#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xen0
-# Tue Jan 31 18:56:38 2006
+# Linux kernel version: 2.6.15-xen0
+# Wed Feb 1 15:54:13 2006
#
-CONFIG_X86=y
+CONFIG_X86_32=y
CONFIG_SEMAPHORE_SLEEPERS=y
+CONFIG_X86=y
CONFIG_MMU=y
CONFIG_UID16=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_KOBJECT_UEVENT=y
# CONFIG_IKCONFIG is not set
CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_ALL is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y
+#
+# Block layer
+#
+# CONFIG_LBD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+
#
# Processor type and features
#
CONFIG_X86_INVLPG=y
CONFIG_X86_BSWAP=y
CONFIG_X86_POPAD_OK=y
+CONFIG_X86_CMPXCHG64=y
CONFIG_X86_GOOD_APIC=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
# CONFIG_SMP is not set
CONFIG_PREEMPT_NONE=y
# CONFIG_PREEMPT_VOLUNTARY is not set
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
CONFIG_MTRR=y
# CONFIG_REGPARM is not set
CONFIG_SECCOMP=y
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_DEBUG is not set
CONFIG_BRIDGE_NETFILTER=y
+
+#
+# Core Netfilter Configuration
+#
# CONFIG_NETFILTER_NETLINK is not set
#
# CONFIG_NET_DIVERT is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
# CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
#
# Network testing
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=4096
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_LBD is not set
# CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
# CONFIG_ATA_OVER_ETH is not set
#
# CONFIG_BLK_DEV_CY82C693 is not set
# CONFIG_BLK_DEV_CS5520 is not set
# CONFIG_BLK_DEV_CS5530 is not set
+# CONFIG_BLK_DEV_CS5535 is not set
# CONFIG_BLK_DEV_HPT34X is not set
# CONFIG_BLK_DEV_HPT366 is not set
# CONFIG_BLK_DEV_SC1200 is not set
#
# SCSI low-level drivers
#
+# CONFIG_ISCSI_TCP is not set
CONFIG_BLK_DEV_3W_XXXX_RAID=y
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_ACARD is not set
CONFIG_SCSI_ATA_PIIX=y
# CONFIG_SCSI_SATA_MV is not set
# CONFIG_SCSI_SATA_NV is not set
-CONFIG_SCSI_SATA_PROMISE=y
+# CONFIG_SCSI_PDC_ADMA is not set
# CONFIG_SCSI_SATA_QSTOR is not set
+CONFIG_SCSI_SATA_PROMISE=y
CONFIG_SCSI_SATA_SX4=y
CONFIG_SCSI_SATA_SIL=y
+CONFIG_SCSI_SATA_SIL24=y
# CONFIG_SCSI_SATA_SIS is not set
# CONFIG_SCSI_SATA_ULI is not set
# CONFIG_SCSI_SATA_VIA is not set
# CONFIG_SCSI_SATA_VITESSE is not set
CONFIG_SCSI_SATA_INTEL_COMBINED=y
-# CONFIG_SCSI_CPQFCTS is not set
# CONFIG_SCSI_DMX3191D is not set
# CONFIG_SCSI_EATA_PIO is not set
# CONFIG_SCSI_FUTURE_DOMAIN is not set
# CONFIG_SCSI_INIA100 is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set
# CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_ISP is not set
# CONFIG_SCSI_QLOGIC_FC is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
CONFIG_SCSI_QLA2XXX=y
#
# Serial drivers
#
-# CONFIG_SERIAL_8250 is not set
#
# Non-8250 serial port support
# TPM devices
#
# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
#
# I2C support
#
# USB Device Class drivers
#
-# CONFIG_USB_BLUETOOTH_TTY is not set
# CONFIG_USB_ACM is not set
# CONFIG_USB_PRINTER is not set
#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
#
# CONFIG_USB_STORAGE is not set
# CONFIG_NLS_KOI8_U is not set
# CONFIG_NLS_UTF8 is not set
+#
+# Instrumentation Support
+#
+# CONFIG_KPROBES is not set
+
#
# Kernel hacking
#
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_INFO is not set
# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
CONFIG_FRAME_POINTER=y
+# CONFIG_RCU_TORTURE_TEST is not set
CONFIG_EARLY_PRINTK=y
# CONFIG_DEBUG_STACKOVERFLOW is not set
-# CONFIG_KPROBES is not set
# CONFIG_DEBUG_STACK_USAGE is not set
# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_4KSTACKS is not set
CONFIG_GENERIC_HARDIRQS=y
CONFIG_GENERIC_IRQ_PROBE=y
CONFIG_X86_BIOS_REBOOT=y
-CONFIG_PC=y
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xen0
-# Tue Jan 31 16:21:00 2006
+# Linux kernel version: 2.6.15-xen0
+# Wed Feb 1 15:50:08 2006
#
CONFIG_X86_64=y
CONFIG_64BIT=y
CONFIG_KOBJECT_UEVENT=y
# CONFIG_IKCONFIG is not set
CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_ALL is not set
# CONFIG_MODULE_SRCVERSION_ALL is not set
CONFIG_KMOD=y
+#
+# Block layer
+#
+# CONFIG_LBD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+
#
# Processor type and features
#
CONFIG_PREEMPT_NONE=y
# CONFIG_PREEMPT_VOLUNTARY is not set
# CONFIG_PREEMPT is not set
-# CONFIG_NUMA is not set
CONFIG_ARCH_FLATMEM_ENABLE=y
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_FLATMEM_MANUAL=y
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
CONFIG_SWIOTLB=y
CONFIG_DUMMY_IOMMU=y
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_DEBUG is not set
CONFIG_BRIDGE_NETFILTER=y
+
+#
+# Core Netfilter Configuration
+#
# CONFIG_NETFILTER_NETLINK is not set
#
# CONFIG_NET_DIVERT is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
# CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
#
# Network testing
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=16384
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_LBD is not set
# CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
# CONFIG_ATA_OVER_ETH is not set
#
#
# SCSI low-level drivers
#
+# CONFIG_ISCSI_TCP is not set
CONFIG_BLK_DEV_3W_XXXX_RAID=y
# CONFIG_SCSI_3W_9XXX is not set
# CONFIG_SCSI_ACARD is not set
CONFIG_SCSI_ATA_PIIX=y
# CONFIG_SCSI_SATA_MV is not set
# CONFIG_SCSI_SATA_NV is not set
-CONFIG_SCSI_SATA_PROMISE=y
+# CONFIG_SCSI_PDC_ADMA is not set
# CONFIG_SCSI_SATA_QSTOR is not set
+CONFIG_SCSI_SATA_PROMISE=y
CONFIG_SCSI_SATA_SX4=y
CONFIG_SCSI_SATA_SIL=y
+CONFIG_SCSI_SATA_SIL24=y
# CONFIG_SCSI_SATA_SIS is not set
# CONFIG_SCSI_SATA_ULI is not set
# CONFIG_SCSI_SATA_VIA is not set
CONFIG_SCSI_SATA_INTEL_COMBINED=y
CONFIG_SCSI_BUSLOGIC=y
# CONFIG_SCSI_OMIT_FLASHPOINT is not set
-# CONFIG_SCSI_CPQFCTS is not set
# CONFIG_SCSI_DMX3191D is not set
# CONFIG_SCSI_EATA is not set
# CONFIG_SCSI_EATA_PIO is not set
# CONFIG_SCSI_INIA100 is not set
# CONFIG_SCSI_SYM53C8XX_2 is not set
# CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_ISP is not set
# CONFIG_SCSI_QLOGIC_FC is not set
# CONFIG_SCSI_QLOGIC_1280 is not set
CONFIG_SCSI_QLA2XXX=y
#
# Serial drivers
#
-# CONFIG_SERIAL_8250 is not set
#
# Non-8250 serial port support
# TPM devices
#
# CONFIG_TCG_TPM is not set
+# CONFIG_TELCLOCK is not set
#
# I2C support
#
# USB Device Class drivers
#
-# CONFIG_USB_BLUETOOTH_TTY is not set
# CONFIG_USB_ACM is not set
# CONFIG_USB_PRINTER is not set
#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
+#
+
+#
+# may also be needed; see USB_STORAGE Help for more information
#
# CONFIG_USB_STORAGE is not set
CONFIG_INFINIBAND_IPOIB=y
CONFIG_INFINIBAND_IPOIB_DEBUG=y
CONFIG_INFINIBAND_IPOIB_DEBUG_DATA=y
+CONFIG_INFINIBAND_SRP=y
#
# SN Devices
# CONFIG_NLS_UTF8 is not set
#
-# Profiling support
+# Instrumentation Support
#
# CONFIG_PROFILING is not set
+# CONFIG_KPROBES is not set
#
# Kernel hacking
# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
# CONFIG_DEBUG_KOBJECT is not set
# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
CONFIG_FRAME_POINTER=y
-# CONFIG_CHECKING is not set
+# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_INIT_DEBUG is not set
-# CONFIG_KPROBES is not set
#
# Security options
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xenU
-# Tue Jan 31 18:57:16 2006
+# Linux kernel version: 2.6.15-xenU
+# Wed Feb 1 17:28:35 2006
#
-CONFIG_X86=y
+CONFIG_X86_32=y
CONFIG_SEMAPHORE_SLEEPERS=y
+CONFIG_X86=y
CONFIG_MMU=y
CONFIG_UID16=y
CONFIG_GENERIC_ISA_DMA=y
# CONFIG_IKCONFIG is not set
# CONFIG_CPUSETS is not set
CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_ALL is not set
CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y
+#
+# Block layer
+#
+# CONFIG_LBD is not set
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+
#
# Processor type and features
#
CONFIG_X86_INVLPG=y
CONFIG_X86_BSWAP=y
CONFIG_X86_POPAD_OK=y
+CONFIG_X86_CMPXCHG64=y
CONFIG_X86_GOOD_APIC=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
CONFIG_SMP=y
CONFIG_SMP_ALTERNATIVES=y
CONFIG_NR_CPUS=8
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
# CONFIG_REGPARM is not set
CONFIG_SECCOMP=y
# CONFIG_HZ_100 is not set
# CONFIG_NET_DIVERT is not set
# CONFIG_ECONET is not set
# CONFIG_WAN_ROUTER is not set
+
+#
+# QoS and/or fair queueing
+#
# CONFIG_NET_SCHED is not set
-# CONFIG_NET_CLS_ROUTE is not set
#
# Network testing
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=4096
CONFIG_BLK_DEV_INITRD=y
-# CONFIG_LBD is not set
# CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
# CONFIG_ATA_OVER_ETH is not set
#
#
# SCSI low-level drivers
#
+# CONFIG_ISCSI_TCP is not set
# CONFIG_SCSI_SATA is not set
# CONFIG_SCSI_DEBUG is not set
# CONFIG_NLS_KOI8_U is not set
# CONFIG_NLS_UTF8 is not set
+#
+# Instrumentation Support
+#
+# CONFIG_KPROBES is not set
+
#
# Kernel hacking
#
CONFIG_DEBUG_BUGVERBOSE=y
# CONFIG_DEBUG_INFO is not set
# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
CONFIG_FRAME_POINTER=y
+# CONFIG_RCU_TORTURE_TEST is not set
CONFIG_EARLY_PRINTK=y
# CONFIG_DEBUG_STACKOVERFLOW is not set
-# CONFIG_KPROBES is not set
# CONFIG_DEBUG_STACK_USAGE is not set
# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_4KSTACKS is not set
CONFIG_X86_SMP=y
CONFIG_X86_BIOS_REBOOT=y
CONFIG_X86_TRAMPOLINE=y
-CONFIG_PC=y
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xenU
-# Tue Jan 31 19:51:18 2006
+# Linux kernel version: 2.6.15-xenU
+# Wed Feb 1 15:49:27 2006
#
CONFIG_X86_64=y
CONFIG_64BIT=y
# CONFIG_IKCONFIG is not set
# CONFIG_CPUSETS is not set
CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_ALL is not set
CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y
+#
+# Block layer
+#
+CONFIG_LBD=y
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+
#
# Processor type and features
#
# CONFIG_PREEMPT_VOLUNTARY is not set
# CONFIG_PREEMPT is not set
CONFIG_PREEMPT_BKL=y
-# CONFIG_NUMA is not set
CONFIG_ARCH_FLATMEM_ENABLE=y
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_FLATMEM_MANUAL=y
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
CONFIG_NR_CPUS=8
# CONFIG_HOTPLUG_CPU is not set
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_DEBUG is not set
CONFIG_BRIDGE_NETFILTER=y
+
+#
+# Core Netfilter Configuration
+#
# CONFIG_NETFILTER_NETLINK is not set
#
CONFIG_NET_DIVERT=y
# CONFIG_ECONET is not set
CONFIG_WAN_ROUTER=m
+
+#
+# QoS and/or fair queueing
+#
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_CLK_JIFFIES=y
# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
# CONFIG_NET_SCH_CLK_CPU is not set
+
+#
+# Queueing/Scheduling
+#
CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_HTB=m
CONFIG_NET_SCH_HFSC=m
CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCH_NETEM=m
CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_QOS=y
-CONFIG_NET_ESTIMATOR=y
+
+#
+# Classification
+#
CONFIG_NET_CLS=y
# CONFIG_NET_CLS_BASIC is not set
CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
CONFIG_CLS_U32_PERF=y
-CONFIG_NET_CLS_IND=y
# CONFIG_CLS_U32_MARK is not set
CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
# CONFIG_NET_EMATCH is not set
# CONFIG_NET_CLS_ACT is not set
CONFIG_NET_CLS_POLICE=y
+CONFIG_NET_CLS_IND=y
+CONFIG_NET_ESTIMATOR=y
#
# Network testing
CONFIG_BT_HCIUART=m
CONFIG_BT_HCIUART_H4=y
CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_BCSP_TXCRC=y
CONFIG_BT_HCIVHCI=m
# CONFIG_IEEE80211 is not set
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=16384
CONFIG_BLK_DEV_INITRD=y
-CONFIG_LBD=y
# CONFIG_CDROM_PKTCDVD is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
# CONFIG_ATA_OVER_ETH is not set
#
#
# SCSI low-level drivers
#
+# CONFIG_ISCSI_TCP is not set
CONFIG_SCSI_SATA=m
# CONFIG_SCSI_DEBUG is not set
#
# ATM drivers
#
+# CONFIG_ATM_DUMMY is not set
CONFIG_ATM_TCP=m
CONFIG_PPP=m
CONFIG_PPP_MULTILINK=y
CONFIG_PPP_SYNC_TTY=m
CONFIG_PPP_DEFLATE=m
# CONFIG_PPP_BSDCOMP is not set
+# CONFIG_PPP_MPPE is not set
CONFIG_PPPOE=m
CONFIG_PPPOATM=m
# CONFIG_SLIP is not set
CONFIG_FS_POSIX_ACL=y
CONFIG_XFS_FS=m
CONFIG_XFS_EXPORT=y
-CONFIG_XFS_QUOTA=m
+# CONFIG_XFS_QUOTA is not set
CONFIG_XFS_SECURITY=y
CONFIG_XFS_POSIX_ACL=y
# CONFIG_XFS_RT is not set
CONFIG_NLS_UTF8=m
#
-# Profiling support
+# Instrumentation Support
#
# CONFIG_PROFILING is not set
+# CONFIG_KPROBES is not set
#
# Kernel hacking
# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
# CONFIG_DEBUG_KOBJECT is not set
# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
CONFIG_FRAME_POINTER=y
+# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_INIT_DEBUG is not set
-# CONFIG_KPROBES is not set
#
# Security options
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xen
-# Tue Jan 31 19:01:58 2006
+# Linux kernel version: 2.6.15-xen
+# Wed Feb 1 17:28:24 2006
#
-CONFIG_X86=y
+CONFIG_X86_32=y
CONFIG_SEMAPHORE_SLEEPERS=y
+CONFIG_X86=y
CONFIG_MMU=y
CONFIG_UID16=y
CONFIG_GENERIC_ISA_DMA=y
# CONFIG_IKCONFIG is not set
# CONFIG_CPUSETS is not set
CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_EMBEDDED=y
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_ALL is not set
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_EPOLL=y
-# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
CONFIG_SHMEM=y
CONFIG_CC_ALIGN_FUNCTIONS=0
CONFIG_CC_ALIGN_LABELS=0
CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y
+#
+# Block layer
+#
+CONFIG_LBD=y
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+
#
# Processor type and features
#
CONFIG_X86_INVLPG=y
CONFIG_X86_BSWAP=y
CONFIG_X86_POPAD_OK=y
+CONFIG_X86_CMPXCHG64=y
CONFIG_X86_GOOD_APIC=y
CONFIG_X86_USE_PPRO_CHECKSUM=y
+CONFIG_X86_TSC=y
CONFIG_SMP=y
CONFIG_SMP_ALTERNATIVES=y
CONFIG_NR_CPUS=8
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
CONFIG_MTRR=y
# CONFIG_REGPARM is not set
CONFIG_SECCOMP=y
# CONFIG_HZ_1000 is not set
CONFIG_HZ=250
CONFIG_PHYSICAL_START=0x100000
-CONFIG_HOTPLUG_CPU=y
# CONFIG_CRASH_DUMP is not set
+CONFIG_HOTPLUG_CPU=y
#
# Power management options (ACPI, APM)
CONFIG_ACPI_HOTKEY=m
CONFIG_ACPI_FAN=m
CONFIG_ACPI_PROCESSOR=m
+CONFIG_ACPI_HOTPLUG_CPU=y
CONFIG_ACPI_THERMAL=m
CONFIG_ACPI_ASUS=m
CONFIG_ACPI_IBM=m
CONFIG_ACPI_POWER=y
CONFIG_ACPI_SYSTEM=y
# CONFIG_X86_PM_TIMER is not set
-# CONFIG_ACPI_CONTAINER is not set
+CONFIG_ACPI_CONTAINER=m
#
# CPU Frequency scaling
# CONFIG_PCI_LEGACY_PROC is not set
# CONFIG_PCI_DEBUG is not set
CONFIG_SCx200=m
-# CONFIG_HOTPLUG_CPU is not set
#
# PCCARD (PCMCIA/CardBus) support
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_DEBUG is not set
CONFIG_BRIDGE_NETFILTER=y
+
+#
+# Core Netfilter Configuration
+#
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_NETLINK_QUEUE=m
CONFIG_NETFILTER_NETLINK_LOG=m
CONFIG_ECONET_AUNUDP=y
CONFIG_ECONET_NATIVE=y
CONFIG_WAN_ROUTER=m
+
+#
+# QoS and/or fair queueing
+#
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_CLK_JIFFIES=y
# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
# CONFIG_NET_SCH_CLK_CPU is not set
+
+#
+# Queueing/Scheduling
+#
CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_HTB=m
CONFIG_NET_SCH_HFSC=m
CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCH_NETEM=m
CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_QOS=y
-CONFIG_NET_ESTIMATOR=y
+
+#
+# Classification
+#
CONFIG_NET_CLS=y
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
# CONFIG_CLS_U32_PERF is not set
-# CONFIG_NET_CLS_IND is not set
# CONFIG_CLS_U32_MARK is not set
CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
CONFIG_NET_EMATCH_TEXT=m
# CONFIG_NET_CLS_ACT is not set
CONFIG_NET_CLS_POLICE=y
+# CONFIG_NET_CLS_IND is not set
+CONFIG_NET_ESTIMATOR=y
#
# Network testing
CONFIG_BT_HCIUART=m
CONFIG_BT_HCIUART_H4=y
CONFIG_BT_HCIUART_BCSP=y
-# CONFIG_BT_HCIUART_BCSP_TXCRC is not set
CONFIG_BT_HCIBCM203X=m
# CONFIG_BT_HCIBPA10X is not set
CONFIG_BT_HCIBFUSB=m
CONFIG_NFTL=m
CONFIG_NFTL_RW=y
CONFIG_INFTL=m
+CONFIG_RFD_FTL=m
#
# RAM/ROM/Flash chip drivers
# CONFIG_MTD_NAND_DISKONCHIP_BBTWRITE is not set
# CONFIG_MTD_NAND_NANDSIM is not set
+#
+# OneNAND Flash Device Drivers
+#
+CONFIG_MTD_ONENAND=m
+# CONFIG_MTD_ONENAND_VERIFY_WRITE is not set
+
#
# Parallel port support
#
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=16384
CONFIG_BLK_DEV_INITRD=y
-CONFIG_LBD=y
CONFIG_CDROM_PKTCDVD=m
CONFIG_CDROM_PKTCDVD_BUFFERS=8
# CONFIG_CDROM_PKTCDVD_WCACHE is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
CONFIG_ATA_OVER_ETH=m
#
CONFIG_BLK_DEV_CY82C693=y
CONFIG_BLK_DEV_CS5520=y
CONFIG_BLK_DEV_CS5530=y
+CONFIG_BLK_DEV_CS5535=m
CONFIG_BLK_DEV_HPT34X=y
# CONFIG_HPT34X_AUTODMA is not set
CONFIG_BLK_DEV_HPT366=y
#
CONFIG_SCSI_SPI_ATTRS=m
CONFIG_SCSI_FC_ATTRS=m
-# CONFIG_SCSI_ISCSI_ATTRS is not set
+CONFIG_SCSI_ISCSI_ATTRS=m
CONFIG_SCSI_SAS_ATTRS=m
#
# SCSI low-level drivers
#
+CONFIG_ISCSI_TCP=m
CONFIG_BLK_DEV_3W_XXXX_RAID=m
CONFIG_SCSI_3W_9XXX=m
CONFIG_SCSI_ACARD=m
CONFIG_SCSI_ATA_PIIX=m
CONFIG_SCSI_SATA_MV=m
CONFIG_SCSI_SATA_NV=m
-CONFIG_SCSI_SATA_PROMISE=m
+CONFIG_SCSI_PDC_ADMA=m
# CONFIG_SCSI_SATA_QSTOR is not set
+CONFIG_SCSI_SATA_PROMISE=m
CONFIG_SCSI_SATA_SX4=m
CONFIG_SCSI_SATA_SIL=m
+CONFIG_SCSI_SATA_SIL24=m
CONFIG_SCSI_SATA_SIS=m
CONFIG_SCSI_SATA_ULI=m
CONFIG_SCSI_SATA_VIA=m
CONFIG_SCSI_SATA_VITESSE=m
CONFIG_SCSI_SATA_INTEL_COMBINED=y
-# CONFIG_SCSI_CPQFCTS is not set
CONFIG_SCSI_DMX3191D=m
CONFIG_SCSI_EATA_PIO=m
CONFIG_SCSI_FUTURE_DOMAIN=m
CONFIG_SCSI_IPR=m
# CONFIG_SCSI_IPR_TRACE is not set
# CONFIG_SCSI_IPR_DUMP is not set
-CONFIG_SCSI_QLOGIC_ISP=m
CONFIG_SCSI_QLOGIC_FC=m
CONFIG_SCSI_QLOGIC_FC_FIRMWARE=y
CONFIG_SCSI_QLOGIC_1280=m
-CONFIG_SCSI_QLOGIC_1280_1040=y
CONFIG_SCSI_QLA2XXX=m
CONFIG_SCSI_QLA21XX=m
CONFIG_SCSI_QLA22XX=m
# PHY device support
#
CONFIG_PHYLIB=m
-CONFIG_PHYCONTROL=y
#
# MII PHY device drivers
# CONFIG_IXGB_NAPI is not set
CONFIG_S2IO=m
# CONFIG_S2IO_NAPI is not set
-# CONFIG_2BUFF_MODE is not set
#
# Token Ring devices
#
# ATM drivers
#
+CONFIG_ATM_DUMMY=m
CONFIG_ATM_TCP=m
CONFIG_ATM_LANAI=m
CONFIG_ATM_ENI=m
CONFIG_PPP_SYNC_TTY=m
CONFIG_PPP_DEFLATE=m
CONFIG_PPP_BSDCOMP=m
+CONFIG_PPP_MPPE=m
CONFIG_PPPOE=m
CONFIG_PPPOATM=m
CONFIG_SLIP=m
CONFIG_TOUCHSCREEN_MK712=m
CONFIG_INPUT_MISC=y
CONFIG_INPUT_PCSPKR=m
+CONFIG_INPUT_WISTRON_BTNS=m
CONFIG_INPUT_UINPUT=m
#
# PCMCIA character devices
#
CONFIG_SYNCLINK_CS=m
+CONFIG_CARDMAN_4000=m
+CONFIG_CARDMAN_4040=m
CONFIG_MWAVE=m
CONFIG_SCx200_GPIO=m
CONFIG_RAW_DRIVER=m
-# CONFIG_HPET is not set
CONFIG_MAX_RAW_DEVS=256
+# CONFIG_HPET is not set
CONFIG_HANGCHECK_TIMER=m
#
# TPM devices
#
# CONFIG_TCG_TPM is not set
+CONFIG_TELCLOCK=m
#
# I2C support
CONFIG_SENSORS_PCF8591=m
CONFIG_SENSORS_RTC8564=m
CONFIG_SENSORS_MAX6875=m
+CONFIG_RTC_X1205_I2C=m
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
# Video Adapters
#
CONFIG_VIDEO_BT848=m
+# CONFIG_VIDEO_BT848_DVB is not set
CONFIG_VIDEO_SAA6588=m
CONFIG_VIDEO_BWQCAM=m
CONFIG_VIDEO_CQCAM=m
CONFIG_VIDEO_HEXIUM_GEMINI=m
CONFIG_VIDEO_CX88=m
# CONFIG_VIDEO_CX88_DVB is not set
+CONFIG_VIDEO_EM28XX=m
CONFIG_VIDEO_OVCAMCHIP=m
+CONFIG_VIDEO_AUDIO_DECODER=m
+CONFIG_VIDEO_DECODER=m
#
# Radio Adapters
# ATSC (North American/Korean Terresterial DTV) frontends
#
CONFIG_DVB_NXT2002=m
+CONFIG_DVB_NXT200X=m
CONFIG_DVB_OR51211=m
CONFIG_DVB_OR51132=m
CONFIG_DVB_BCM3510=m
CONFIG_FB_CFB_FILLRECT=m
CONFIG_FB_CFB_COPYAREA=m
CONFIG_FB_CFB_IMAGEBLIT=m
-CONFIG_FB_SOFT_CURSOR=m
# CONFIG_FB_MACMODES is not set
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
CONFIG_VIDEO_SELECT=y
CONFIG_FB_HGA=m
# CONFIG_FB_HGA_ACCEL is not set
+CONFIG_FB_S1D13XXX=m
CONFIG_FB_NVIDIA=m
CONFIG_FB_NVIDIA_I2C=y
CONFIG_FB_RIVA=m
# CONFIG_FB_PM3 is not set
CONFIG_FB_GEODE=y
CONFIG_FB_GEODE_GX1=m
-CONFIG_FB_S1D13XXX=m
CONFIG_FB_VIRTUAL=m
#
CONFIG_VGA_CONSOLE=y
CONFIG_DUMMY_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE=m
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
# CONFIG_FONTS is not set
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y
# Advanced Linux Sound Architecture
#
CONFIG_SND=m
+CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_AC97_BUS=m
CONFIG_SND_TIMER=m
CONFIG_SND_PCM=m
CONFIG_SND_HWDEP=m
CONFIG_SND_MTPAV=m
CONFIG_SND_SERIAL_U16550=m
CONFIG_SND_MPU401=m
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_AC97_BUS=m
#
# PCI devices
# Open Sound System
#
CONFIG_SOUND_PRIME=m
-CONFIG_SOUND_BT878=m
-CONFIG_SOUND_CMPCI=m
-# CONFIG_SOUND_CMPCI_FM is not set
-# CONFIG_SOUND_CMPCI_MIDI is not set
-CONFIG_SOUND_CMPCI_JOYSTICK=y
-CONFIG_SOUND_EMU10K1=m
+# CONFIG_OBSOLETE_OSS_DRIVER is not set
CONFIG_SOUND_FUSION=m
-CONFIG_SOUND_CS4281=m
-CONFIG_SOUND_ES1370=m
-CONFIG_SOUND_ES1371=m
-CONFIG_SOUND_ESSSOLO1=m
-CONFIG_SOUND_MAESTRO=m
-CONFIG_SOUND_MAESTRO3=m
CONFIG_SOUND_ICH=m
-CONFIG_SOUND_SONICVIBES=m
CONFIG_SOUND_TRIDENT=m
# CONFIG_SOUND_MSNDCLAS is not set
# CONFIG_SOUND_MSNDPIN is not set
-CONFIG_SOUND_VIA82CXXX=m
CONFIG_SOUND_TVMIXER=m
-CONFIG_SOUND_ALI5455=m
-CONFIG_SOUND_FORTE=m
-CONFIG_SOUND_RME96XX=m
-CONFIG_SOUND_AD1980=m
#
# USB support
# USB Device Class drivers
#
# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
#
-# USB Bluetooth TTY can only be used with disabled Bluetooth subsystem
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
#
-CONFIG_USB_ACM=m
-CONFIG_USB_PRINTER=m
#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+# may also be needed; see USB_STORAGE Help for more information
#
CONFIG_USB_STORAGE=m
# CONFIG_USB_STORAGE_DEBUG is not set
CONFIG_USB_STORAGE_SDDR09=y
CONFIG_USB_STORAGE_SDDR55=y
CONFIG_USB_STORAGE_JUMPSHOT=y
-CONFIG_USB_STORAGE_ONETOUCH=y
#
# USB Input Devices
CONFIG_USB_SERIAL=m
CONFIG_USB_SERIAL_GENERIC=y
CONFIG_USB_SERIAL_AIRPRIME=m
+CONFIG_USB_SERIAL_ANYDATA=m
CONFIG_USB_SERIAL_BELKIN=m
CONFIG_USB_SERIAL_WHITEHEAT=m
CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
CONFIG_FS_POSIX_ACL=y
CONFIG_XFS_FS=m
CONFIG_XFS_EXPORT=y
-CONFIG_XFS_QUOTA=m
+# CONFIG_XFS_QUOTA is not set
CONFIG_XFS_SECURITY=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_XFS_RT=y
CONFIG_JFFS2_FS=m
CONFIG_JFFS2_FS_DEBUG=0
CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_SUMMARY is not set
# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
CONFIG_JFFS2_ZLIB=y
CONFIG_JFFS2_RTIME=y
CONFIG_NLS_KOI8_U=m
CONFIG_NLS_UTF8=m
+#
+# Instrumentation Support
+#
+# CONFIG_KPROBES is not set
+
#
# Kernel hacking
#
# CONFIG_DEBUG_BUGVERBOSE is not set
# CONFIG_DEBUG_INFO is not set
# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
# CONFIG_FRAME_POINTER is not set
+# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_EARLY_PRINTK is not set
# CONFIG_DEBUG_STACKOVERFLOW is not set
-# CONFIG_KPROBES is not set
# CONFIG_DEBUG_STACK_USAGE is not set
# CONFIG_DEBUG_PAGEALLOC is not set
# CONFIG_4KSTACKS is not set
#
# Automatically generated make config: don't edit
-# Linux kernel version: 2.6.14-xen
-# Tue Jan 31 18:19:07 2006
+# Linux kernel version: 2.6.15-xen
+# Wed Feb 1 15:51:35 2006
#
CONFIG_X86_64=y
CONFIG_64BIT=y
# CONFIG_IKCONFIG is not set
# CONFIG_CPUSETS is not set
CONFIG_INITRAMFS_SOURCE=""
+# CONFIG_CC_OPTIMIZE_FOR_SIZE is not set
# CONFIG_EMBEDDED is not set
CONFIG_KALLSYMS=y
# CONFIG_KALLSYMS_ALL is not set
CONFIG_KMOD=y
CONFIG_STOP_MACHINE=y
+#
+# Block layer
+#
+CONFIG_LBD=y
+
+#
+# IO Schedulers
+#
+CONFIG_IOSCHED_NOOP=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+CONFIG_IOSCHED_CFQ=y
+CONFIG_DEFAULT_AS=y
+# CONFIG_DEFAULT_DEADLINE is not set
+# CONFIG_DEFAULT_CFQ is not set
+# CONFIG_DEFAULT_NOOP is not set
+CONFIG_DEFAULT_IOSCHED="anticipatory"
+
#
# Processor type and features
#
# CONFIG_PREEMPT_VOLUNTARY is not set
# CONFIG_PREEMPT is not set
CONFIG_PREEMPT_BKL=y
-# CONFIG_NUMA is not set
CONFIG_ARCH_FLATMEM_ENABLE=y
CONFIG_SELECT_MEMORY_MODEL=y
CONFIG_FLATMEM_MANUAL=y
CONFIG_FLATMEM=y
CONFIG_FLAT_NODE_MEM_MAP=y
# CONFIG_SPARSEMEM_STATIC is not set
+CONFIG_SPLIT_PTLOCK_CPUS=4096
CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
CONFIG_NR_CPUS=8
# CONFIG_HOTPLUG_CPU is not set
CONFIG_NETFILTER=y
# CONFIG_NETFILTER_DEBUG is not set
CONFIG_BRIDGE_NETFILTER=y
+
+#
+# Core Netfilter Configuration
+#
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_NETLINK_QUEUE=m
CONFIG_NETFILTER_NETLINK_LOG=m
CONFIG_NET_DIVERT=y
# CONFIG_ECONET is not set
CONFIG_WAN_ROUTER=m
+
+#
+# QoS and/or fair queueing
+#
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_CLK_JIFFIES=y
# CONFIG_NET_SCH_CLK_GETTIMEOFDAY is not set
# CONFIG_NET_SCH_CLK_CPU is not set
+
+#
+# Queueing/Scheduling
+#
CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_HTB=m
CONFIG_NET_SCH_HFSC=m
CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCH_NETEM=m
CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_QOS=y
-CONFIG_NET_ESTIMATOR=y
+
+#
+# Classification
+#
CONFIG_NET_CLS=y
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
CONFIG_CLS_U32_PERF=y
-CONFIG_NET_CLS_IND=y
CONFIG_CLS_U32_MARK=y
CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
CONFIG_NET_EMATCH_TEXT=m
# CONFIG_NET_CLS_ACT is not set
CONFIG_NET_CLS_POLICE=y
+CONFIG_NET_CLS_IND=y
+CONFIG_NET_ESTIMATOR=y
#
# Network testing
CONFIG_BT_HCIUART=m
CONFIG_BT_HCIUART_H4=y
CONFIG_BT_HCIUART_BCSP=y
-CONFIG_BT_HCIUART_BCSP_TXCRC=y
CONFIG_BT_HCIBCM203X=m
CONFIG_BT_HCIBPA10X=m
CONFIG_BT_HCIBFUSB=m
CONFIG_NFTL=m
CONFIG_NFTL_RW=y
CONFIG_INFTL=m
+CONFIG_RFD_FTL=m
#
# RAM/ROM/Flash chip drivers
# CONFIG_MTD_NAND_DISKONCHIP is not set
# CONFIG_MTD_NAND_NANDSIM is not set
+#
+# OneNAND Flash Device Drivers
+#
+CONFIG_MTD_ONENAND=m
+# CONFIG_MTD_ONENAND_VERIFY_WRITE is not set
+
#
# Parallel port support
#
CONFIG_BLK_DEV_RAM_COUNT=16
CONFIG_BLK_DEV_RAM_SIZE=16384
CONFIG_BLK_DEV_INITRD=y
-CONFIG_LBD=y
CONFIG_CDROM_PKTCDVD=m
CONFIG_CDROM_PKTCDVD_BUFFERS=8
# CONFIG_CDROM_PKTCDVD_WCACHE is not set
-
-#
-# IO Schedulers
-#
-CONFIG_IOSCHED_NOOP=y
-CONFIG_IOSCHED_AS=y
-CONFIG_IOSCHED_DEADLINE=y
-CONFIG_IOSCHED_CFQ=y
CONFIG_ATA_OVER_ETH=m
#
#
# SCSI low-level drivers
#
+CONFIG_ISCSI_TCP=m
CONFIG_BLK_DEV_3W_XXXX_RAID=m
CONFIG_SCSI_3W_9XXX=m
CONFIG_SCSI_ACARD=m
CONFIG_SCSI_ATA_PIIX=y
CONFIG_SCSI_SATA_MV=m
CONFIG_SCSI_SATA_NV=m
-CONFIG_SCSI_SATA_PROMISE=m
+CONFIG_SCSI_PDC_ADMA=m
CONFIG_SCSI_SATA_QSTOR=m
+CONFIG_SCSI_SATA_PROMISE=m
CONFIG_SCSI_SATA_SX4=m
CONFIG_SCSI_SATA_SIL=m
+CONFIG_SCSI_SATA_SIL24=m
CONFIG_SCSI_SATA_SIS=m
CONFIG_SCSI_SATA_ULI=m
CONFIG_SCSI_SATA_VIA=m
CONFIG_SCSI_SATA_INTEL_COMBINED=y
CONFIG_SCSI_BUSLOGIC=m
# CONFIG_SCSI_OMIT_FLASHPOINT is not set
-# CONFIG_SCSI_CPQFCTS is not set
# CONFIG_SCSI_DMX3191D is not set
# CONFIG_SCSI_EATA is not set
# CONFIG_SCSI_EATA_PIO is not set
CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
# CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
# CONFIG_SCSI_IPR is not set
-# CONFIG_SCSI_QLOGIC_ISP is not set
# CONFIG_SCSI_QLOGIC_FC is not set
CONFIG_SCSI_QLOGIC_1280=m
-CONFIG_SCSI_QLOGIC_1280_1040=y
CONFIG_SCSI_QLA2XXX=y
CONFIG_SCSI_QLA21XX=m
CONFIG_SCSI_QLA22XX=m
# PHY device support
#
CONFIG_PHYLIB=m
-CONFIG_PHYCONTROL=y
#
# MII PHY device drivers
CONFIG_IXGB_NAPI=y
CONFIG_S2IO=m
CONFIG_S2IO_NAPI=y
-# CONFIG_2BUFF_MODE is not set
#
# Token Ring devices
#
# ATM drivers
#
+CONFIG_ATM_DUMMY=m
CONFIG_ATM_TCP=m
CONFIG_ATM_LANAI=m
CONFIG_ATM_ENI=m
CONFIG_PPP_SYNC_TTY=m
CONFIG_PPP_DEFLATE=m
# CONFIG_PPP_BSDCOMP is not set
+CONFIG_PPP_MPPE=m
CONFIG_PPPOE=m
CONFIG_PPPOATM=m
CONFIG_SLIP=m
#
# Serial drivers
#
-# CONFIG_SERIAL_8250 is not set
#
# Non-8250 serial port support
# TPM devices
#
# CONFIG_TCG_TPM is not set
+CONFIG_TELCLOCK=m
#
# I2C support
CONFIG_SENSORS_PCF8591=m
CONFIG_SENSORS_RTC8564=m
CONFIG_SENSORS_MAX6875=m
+CONFIG_RTC_X1205_I2C=m
# CONFIG_I2C_DEBUG_CORE is not set
# CONFIG_I2C_DEBUG_ALGO is not set
# CONFIG_I2C_DEBUG_BUS is not set
# Video Adapters
#
CONFIG_VIDEO_BT848=m
+# CONFIG_VIDEO_BT848_DVB is not set
CONFIG_VIDEO_SAA6588=m
CONFIG_VIDEO_BWQCAM=m
CONFIG_VIDEO_CQCAM=m
CONFIG_VIDEO_ZORAN_LML33R10=m
# CONFIG_VIDEO_ZR36120 is not set
CONFIG_VIDEO_SAA7134=m
+CONFIG_VIDEO_SAA7134_ALSA=m
CONFIG_VIDEO_SAA7134_DVB=m
+CONFIG_VIDEO_SAA7134_DVB_ALL_FRONTENDS=y
CONFIG_VIDEO_MXB=m
CONFIG_VIDEO_DPC=m
CONFIG_VIDEO_HEXIUM_ORION=m
CONFIG_VIDEO_HEXIUM_GEMINI=m
CONFIG_VIDEO_CX88=m
CONFIG_VIDEO_CX88_DVB=m
+CONFIG_VIDEO_CX88_DVB_ALL_FRONTENDS=y
+CONFIG_VIDEO_EM28XX=m
CONFIG_VIDEO_OVCAMCHIP=m
+CONFIG_VIDEO_AUDIO_DECODER=m
+CONFIG_VIDEO_DECODER=m
#
# Radio Adapters
# ATSC (North American/Korean Terresterial DTV) frontends
#
CONFIG_DVB_NXT2002=m
+CONFIG_DVB_NXT200X=m
CONFIG_DVB_OR51211=m
CONFIG_DVB_OR51132=m
CONFIG_DVB_BCM3510=m
CONFIG_FB_CFB_FILLRECT=y
CONFIG_FB_CFB_COPYAREA=y
CONFIG_FB_CFB_IMAGEBLIT=y
-CONFIG_FB_SOFT_CURSOR=y
# CONFIG_FB_MACMODES is not set
CONFIG_FB_MODE_HELPERS=y
CONFIG_FB_TILEBLITTING=y
CONFIG_FB_VESA=y
CONFIG_VIDEO_SELECT=y
# CONFIG_FB_HGA is not set
+# CONFIG_FB_S1D13XXX is not set
# CONFIG_FB_NVIDIA is not set
CONFIG_FB_RIVA=m
# CONFIG_FB_RIVA_I2C is not set
CONFIG_FB_TRIDENT_ACCEL=y
# CONFIG_FB_PM3 is not set
# CONFIG_FB_GEODE is not set
-# CONFIG_FB_S1D13XXX is not set
# CONFIG_FB_VIRTUAL is not set
#
CONFIG_VGA_CONSOLE=y
CONFIG_DUMMY_CONSOLE=y
CONFIG_FRAMEBUFFER_CONSOLE=y
+# CONFIG_FRAMEBUFFER_CONSOLE_ROTATION is not set
# CONFIG_FONTS is not set
CONFIG_FONT_8x8=y
CONFIG_FONT_8x16=y
# Advanced Linux Sound Architecture
#
CONFIG_SND=m
+CONFIG_SND_AC97_CODEC=m
+CONFIG_SND_AC97_BUS=m
CONFIG_SND_TIMER=m
CONFIG_SND_PCM=m
CONFIG_SND_HWDEP=m
CONFIG_SND_MTPAV=m
# CONFIG_SND_SERIAL_U16550 is not set
CONFIG_SND_MPU401=m
-CONFIG_SND_AC97_CODEC=m
-CONFIG_SND_AC97_BUS=m
#
# PCI devices
# USB Device Class drivers
#
# CONFIG_OBSOLETE_OSS_USB_DRIVER is not set
+CONFIG_USB_ACM=m
+CONFIG_USB_PRINTER=m
#
-# USB Bluetooth TTY can only be used with disabled Bluetooth subsystem
+# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support'
#
-CONFIG_USB_ACM=m
-CONFIG_USB_PRINTER=m
#
-# NOTE: USB_STORAGE enables SCSI, and 'SCSI disk support' may also be needed; see USB_STORAGE Help for more information
+# may also be needed; see USB_STORAGE Help for more information
#
CONFIG_USB_STORAGE=m
# CONFIG_USB_STORAGE_DEBUG is not set
CONFIG_USB_STORAGE_SDDR09=y
CONFIG_USB_STORAGE_SDDR55=y
CONFIG_USB_STORAGE_JUMPSHOT=y
-CONFIG_USB_STORAGE_ONETOUCH=y
#
# USB Input Devices
CONFIG_USB_SERIAL=m
CONFIG_USB_SERIAL_GENERIC=y
CONFIG_USB_SERIAL_AIRPRIME=m
+CONFIG_USB_SERIAL_ANYDATA=m
CONFIG_USB_SERIAL_BELKIN=m
CONFIG_USB_SERIAL_WHITEHEAT=m
CONFIG_USB_SERIAL_DIGI_ACCELEPORT=m
# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
CONFIG_INFINIBAND_IPOIB=m
# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+CONFIG_INFINIBAND_SRP=m
#
# SN Devices
CONFIG_FS_POSIX_ACL=y
CONFIG_XFS_FS=m
CONFIG_XFS_EXPORT=y
-CONFIG_XFS_QUOTA=m
+# CONFIG_XFS_QUOTA is not set
CONFIG_XFS_SECURITY=y
CONFIG_XFS_POSIX_ACL=y
# CONFIG_XFS_RT is not set
CONFIG_JFFS2_FS=m
CONFIG_JFFS2_FS_DEBUG=0
CONFIG_JFFS2_FS_WRITEBUFFER=y
+# CONFIG_JFFS2_SUMMARY is not set
# CONFIG_JFFS2_COMPRESSION_OPTIONS is not set
CONFIG_JFFS2_ZLIB=y
CONFIG_JFFS2_RTIME=y
CONFIG_NLS_UTF8=m
#
-# Profiling support
+# Instrumentation Support
#
# CONFIG_PROFILING is not set
+# CONFIG_KPROBES is not set
#
# Kernel hacking
# CONFIG_DEBUG_SPINLOCK_SLEEP is not set
# CONFIG_DEBUG_KOBJECT is not set
# CONFIG_DEBUG_FS is not set
+# CONFIG_DEBUG_VM is not set
# CONFIG_FRAME_POINTER is not set
+# CONFIG_RCU_TORTURE_TEST is not set
# CONFIG_INIT_DEBUG is not set
-# CONFIG_KPROBES is not set
#
# Security options
OS = linux
LINUX_SERIES = 2.6
-LINUX_VER = 2.6.14
+LINUX_VER = 2.6.15
EXTRAVERSION ?= xen
mainmenu "Linux Kernel Configuration"
-config X86
+config X86_32
bool
default y
help
bool
default y
+config X86
+ bool
+ default y
+
config MMU
bool
default y
default y
depends on SMP && X86_ES7000 && MPENTIUMIII
-if !X86_ELAN
-
-choice
- prompt "Processor family"
- default M686
-
-config M386
- bool "386"
- ---help---
- This is the processor type of your CPU. This information is used for
- optimizing purposes. In order to compile a kernel that can run on
- all x86 CPU types (albeit not optimally fast), you can specify
- "386" here.
-
- The kernel will not necessarily run on earlier architectures than
- the one you have chosen, e.g. a Pentium optimized kernel will run on
- a PPro, but not necessarily on a i486.
-
- Here are the settings recommended for greatest speed:
- - "386" for the AMD/Cyrix/Intel 386DX/DXL/SL/SLC/SX, Cyrix/TI
- 486DLC/DLC2, UMC 486SX-S and NexGen Nx586. Only "386" kernels
- will run on a 386 class machine.
- - "486" for the AMD/Cyrix/IBM/Intel 486DX/DX2/DX4 or
- SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or U5S.
- - "586" for generic Pentium CPUs lacking the TSC
- (time stamp counter) register.
- - "Pentium-Classic" for the Intel Pentium.
- - "Pentium-MMX" for the Intel Pentium MMX.
- - "Pentium-Pro" for the Intel Pentium Pro.
- - "Pentium-II" for the Intel Pentium II or pre-Coppermine Celeron.
- - "Pentium-III" for the Intel Pentium III or Coppermine Celeron.
- - "Pentium-4" for the Intel Pentium 4 or P4-based Celeron.
- - "K6" for the AMD K6, K6-II and K6-III (aka K6-3D).
- - "Athlon" for the AMD K7 family (Athlon/Duron/Thunderbird).
- - "Crusoe" for the Transmeta Crusoe series.
- - "Efficeon" for the Transmeta Efficeon series.
- - "Winchip-C6" for original IDT Winchip.
- - "Winchip-2" for IDT Winchip 2.
- - "Winchip-2A" for IDT Winchips with 3dNow! capabilities.
- - "GeodeGX1" for Geode GX1 (Cyrix MediaGX).
- - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
- - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above).
-
- If you don't know what to do, choose "386".
-
-config M486
- bool "486"
- help
- Select this for a 486 series processor, either Intel or one of the
- compatible processors from AMD, Cyrix, IBM, or Intel. Includes DX,
- DX2, and DX4 variants; also SL/SLC/SLC2/SLC3/SX/SX2 and UMC U5D or
- U5S.
-
-config M586
- bool "586/K5/5x86/6x86/6x86MX"
- help
- Select this for an 586 or 686 series processor such as the AMD K5,
- the Cyrix 5x86, 6x86 and 6x86MX. This choice does not
- assume the RDTSC (Read Time Stamp Counter) instruction.
-
-config M586TSC
- bool "Pentium-Classic"
- help
- Select this for a Pentium Classic processor with the RDTSC (Read
- Time Stamp Counter) instruction for benchmarking.
-
-config M586MMX
- bool "Pentium-MMX"
- help
- Select this for a Pentium with the MMX graphics/multimedia
- extended instructions.
-
-config M686
- bool "Pentium-Pro"
- help
- Select this for Intel Pentium Pro chips. This enables the use of
- Pentium Pro extended instructions, and disables the init-time guard
- against the f00f bug found in earlier Pentiums.
-
-config MPENTIUMII
- bool "Pentium-II/Celeron(pre-Coppermine)"
- help
- Select this for Intel chips based on the Pentium-II and
- pre-Coppermine Celeron core. This option enables an unaligned
- copy optimization, compiles the kernel with optimization flags
- tailored for the chip, and applies any applicable Pentium Pro
- optimizations.
-
-config MPENTIUMIII
- bool "Pentium-III/Celeron(Coppermine)/Pentium-III Xeon"
- help
- Select this for Intel chips based on the Pentium-III and
- Celeron-Coppermine core. This option enables use of some
- extended prefetch instructions in addition to the Pentium II
- extensions.
-
-config MPENTIUMM
- bool "Pentium M"
- help
- Select this for Intel Pentium M (not Pentium-4 M)
- notebook chips.
-
-config MPENTIUM4
- bool "Pentium-4/Celeron(P4-based)/Pentium-4 M/Xeon"
- help
- Select this for Intel Pentium 4 chips. This includes the
- Pentium 4, P4-based Celeron and Xeon, and Pentium-4 M
- (not Pentium M) chips. This option enables compile flags
- optimized for the chip, uses the correct cache shift, and
- applies any applicable Pentium III optimizations.
-
-config MK6
- bool "K6/K6-II/K6-III"
- help
- Select this for an AMD K6-family processor. Enables use of
- some extended instructions, and passes appropriate optimization
- flags to GCC.
-
-config MK7
- bool "Athlon/Duron/K7"
- help
- Select this for an AMD Athlon K7-family processor. Enables use of
- some extended instructions, and passes appropriate optimization
- flags to GCC.
-
-config MK8
- bool "Opteron/Athlon64/Hammer/K8"
- help
- Select this for an AMD Opteron or Athlon64 Hammer-family processor. Enables
- use of some extended instructions, and passes appropriate optimization
- flags to GCC.
-
-config MCRUSOE
- bool "Crusoe"
- help
- Select this for a Transmeta Crusoe processor. Treats the processor
- like a 586 with TSC, and sets some GCC optimization flags (like a
- Pentium Pro with no alignment requirements).
-
-config MEFFICEON
- bool "Efficeon"
- help
- Select this for a Transmeta Efficeon processor.
-
-config MWINCHIPC6
- bool "Winchip-C6"
- help
- Select this for an IDT Winchip C6 chip. Linux and GCC
- treat this chip as a 586TSC with some extended instructions
- and alignment requirements.
-
-config MWINCHIP2
- bool "Winchip-2"
- help
- Select this for an IDT Winchip-2. Linux and GCC
- treat this chip as a 586TSC with some extended instructions
- and alignment requirements.
-
-config MWINCHIP3D
- bool "Winchip-2A/Winchip-3"
- help
- Select this for an IDT Winchip-2A or 3. Linux and GCC
- treat this chip as a 586TSC with some extended instructions
- and alignment reqirements. Also enable out of order memory
- stores for this CPU, which can increase performance of some
- operations.
-
-config MGEODEGX1
- bool "GeodeGX1"
- help
- Select this for a Geode GX1 (Cyrix MediaGX) chip.
-
-config MCYRIXIII
- bool "CyrixIII/VIA-C3"
- help
- Select this for a Cyrix III or C3 chip. Presently Linux and GCC
- treat this chip as a generic 586. Whilst the CPU is 686 class,
- it lacks the cmov extension which gcc assumes is present when
- generating 686 code.
- Note that Nehemiah (Model 9) and above will not boot with this
- kernel due to them lacking the 3DNow! instructions used in earlier
- incarnations of the CPU.
-
-config MVIAC3_2
- bool "VIA C3-2 (Nehemiah)"
- help
- Select this for a VIA C3 "Nehemiah". Selecting this enables usage
- of SSE and tells gcc to treat the CPU as a 686.
- Note, this kernel will not boot on older (pre model 9) C3s.
-
-endchoice
-
-config X86_GENERIC
- bool "Generic x86 support"
- help
- Instead of just including optimizations for the selected
- x86 variant (e.g. PII, Crusoe or Athlon), include some more
- generic optimizations as well. This will make the kernel
- perform better on x86 CPUs other than that selected.
-
- This is really intended for distributors who need more
- generic optimizations.
-
-endif
-
-#
-# Define implied options from the CPU selection here
-#
-config X86_CMPXCHG
- bool
- depends on !M386
- default y
-
-config X86_XADD
- bool
- depends on !M386
- default y
-
-config X86_L1_CACHE_SHIFT
- int
- default "7" if MPENTIUM4 || X86_GENERIC
- default "4" if X86_ELAN || M486 || M386
- default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1
- default "6" if MK7 || MK8 || MPENTIUMM
-
-config RWSEM_GENERIC_SPINLOCK
- bool
- depends on M386
- default y
-
-config RWSEM_XCHGADD_ALGORITHM
- bool
- depends on !M386
- default y
-
-config GENERIC_CALIBRATE_DELAY
- bool
- default y
-
-config X86_PPRO_FENCE
- bool
- depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 || MGEODEGX1
- default y
-
-config X86_F00F_BUG
- bool
- depends on M586MMX || M586TSC || M586 || M486 || M386
- default y
-
-config X86_WP_WORKS_OK
- bool
- depends on !M386
- default y
-
-config X86_INVLPG
- bool
- depends on !M386
- default y
-
-config X86_BSWAP
- bool
- depends on !M386
- default y
-
-config X86_POPAD_OK
- bool
- depends on !M386
- default y
-
-config X86_ALIGNMENT_16
- bool
- depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || X86_ELAN || MK6 || M586MMX || M586TSC || M586 || M486 || MVIAC3_2 || MGEODEGX1
- default y
-
-config X86_GOOD_APIC
- bool
- depends on MK7 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || MK8 || MEFFICEON
- default y
-
-config X86_INTEL_USERCOPY
- bool
- depends on MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M586MMX || X86_GENERIC || MK8 || MK7 || MEFFICEON
- default y
-
-config X86_USE_PPRO_CHECKSUM
- bool
- depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON
- default y
-
-config X86_USE_3DNOW
- bool
- depends on MCYRIXIII || MK7
- default y
-
-config X86_OOSTORE
- bool
- depends on (MWINCHIP3D || MWINCHIP2 || MWINCHIPC6) && MTRR
- default y
+source "arch/i386/Kconfig.cpu"
config HPET_TIMER
bool "HPET Timer Support"
config APM
tristate "APM (Advanced Power Management) BIOS support"
- depends on PM
+ depends on PM && PM_LEGACY
---help---
APM is a BIOS specification for saving power using several different
techniques. This is mostly useful for battery powered laptops with
source "fs/Kconfig"
+menu "Instrumentation Support"
+ depends on EXPERIMENTAL
+
if !X86_XEN
source "arch/i386/oprofile/Kconfig"
endif
+config KPROBES
+ bool "Kprobes (EXPERIMENTAL)"
+ help
+ Kprobes allows you to trap at almost any kernel address and
+ execute a callback function. register_kprobe() establishes
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+endmenu
+
source "arch/i386/Kconfig.debug"
source "security/Kconfig"
bool
depends on X86_SMP || (X86_VOYAGER && SMP)
default y
-
-config PC
- bool
- depends on X86 && !EMBEDDED
- default y
# prevent gcc from keeping the stack 16 byte aligned
CFLAGS += $(call cc-option,-mpreferred-stack-boundary=2)
-align := $(cc-option-align)
-cflags-$(CONFIG_M386) += -march=i386
-cflags-$(CONFIG_M486) += -march=i486
-cflags-$(CONFIG_M586) += -march=i586
-cflags-$(CONFIG_M586TSC) += -march=i586
-cflags-$(CONFIG_M586MMX) += $(call cc-option,-march=pentium-mmx,-march=i586)
-cflags-$(CONFIG_M686) += -march=i686
-cflags-$(CONFIG_MPENTIUMII) += -march=i686 $(call cc-option,-mtune=pentium2)
-cflags-$(CONFIG_MPENTIUMIII) += -march=i686 $(call cc-option,-mtune=pentium3)
-cflags-$(CONFIG_MPENTIUMM) += -march=i686 $(call cc-option,-mtune=pentium3)
-cflags-$(CONFIG_MPENTIUM4) += -march=i686 $(call cc-option,-mtune=pentium4)
-cflags-$(CONFIG_MK6) += -march=k6
-# Please note, that patches that add -march=athlon-xp and friends are pointless.
-# They make zero difference whatsosever to performance at this time.
-cflags-$(CONFIG_MK7) += $(call cc-option,-march=athlon,-march=i686 $(align)-functions=4)
-cflags-$(CONFIG_MK8) += $(call cc-option,-march=k8,$(call cc-option,-march=athlon,-march=i686 $(align)-functions=4))
-cflags-$(CONFIG_MCRUSOE) += -march=i686 $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
-cflags-$(CONFIG_MEFFICEON) += -march=i686 $(call cc-option,-mtune=pentium3) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
-cflags-$(CONFIG_MWINCHIPC6) += $(call cc-option,-march=winchip-c6,-march=i586)
-cflags-$(CONFIG_MWINCHIP2) += $(call cc-option,-march=winchip2,-march=i586)
-cflags-$(CONFIG_MWINCHIP3D) += $(call cc-option,-march=winchip2,-march=i586)
-cflags-$(CONFIG_MCYRIXIII) += $(call cc-option,-march=c3,-march=i486) $(align)-functions=0 $(align)-jumps=0 $(align)-loops=0
-cflags-$(CONFIG_MVIAC3_2) += $(call cc-option,-march=c3-2,-march=i686)
-
-# AMD Elan support
-cflags-$(CONFIG_X86_ELAN) += -march=i486
-
-# Geode GX1 support
-cflags-$(CONFIG_MGEODEGX1) += $(call cc-option,-march=pentium-mmx,-march=i486)
+# CPU-specific tuning. Anything which can be shared with UML should go here.
+include $(srctree)/arch/i386/Makefile.cpu
# -mregparm=3 works ok on gcc-3.0 and later
#
include $(srctree)/scripts/Makefile.xen
obj-y += fixup.o
+microcode-$(subst m,y,$(CONFIG_MICROCODE)) := microcode-xen.o
n-obj-xen := i8259.o doublefault.o timers/ reboot.o smpboot.o trampoline.o
obj-y := $(call filterxen, $(obj-y), $(n-obj-xen))
#include <asm/apic.h>
#include <asm/io.h>
#include <asm/mpspec.h>
-#ifdef CONFIG_XEN
-#include <asm/fixmap.h>
-#endif
#ifdef CONFIG_X86_64
-static inline void acpi_madt_oem_check(char *oem_id, char *oem_table_id)
-{
-}
extern void __init clustered_apic_check(void);
-static inline int ioapic_setup_disabled(void)
-{
- return 0;
-}
+extern int gsi_irq_sharing(int gsi);
#include <asm/proto.h>
+static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; }
+
+
#else /* X86 */
#ifdef CONFIG_X86_LOCAL_APIC
#include <mach_mpparse.h>
#endif /* CONFIG_X86_LOCAL_APIC */
+static inline int gsi_irq_sharing(int gsi) { return gsi; }
+
#endif /* X86 */
#define BAD_MADT_ENTRY(entry, end) ( \
int idx;
#ifndef CONFIG_XEN
- if (phys + size < 8 * 1024 * 1024)
+ if (phys + size < 8 * 1024 * 1024)
return __va(phys);
#endif
acpi_table_print_madt_entry(header);
- /* no utility in registering a disabled processor */
- if (processor->flags.enabled == 0)
- return 0;
+ /* Register even disabled CPUs for cpu hotplug */
x86_acpiid_to_apicid[processor->acpi_id] = processor->id;
*irq = IO_APIC_VECTOR(gsi);
else
#endif
- *irq = gsi;
+ *irq = gsi_irq_sharing(gsi);
return 0;
}
EXPORT_SYMBOL(acpi_unregister_ioapic);
static unsigned long __init
-acpi_scan_rsdp (unsigned long start, unsigned long length)
+acpi_scan_rsdp(unsigned long start, unsigned long length)
{
unsigned long offset = 0;
unsigned long sig_len = sizeof("RSD PTR ") - 1;
return 0;
pmtmr_ioport = fadt->xpm_tmr_blk.address;
+ /*
+ * "X" fields are optional extensions to the original V1.0
+ * fields, so we must selectively expand V1.0 fields if the
+ * corresponding X field is zero.
+ */
+ if (!pmtmr_ioport)
+ pmtmr_ioport = fadt->V1_pm_tmr_blk;
} else {
/* FADT rev. 1 */
pmtmr_ioport = fadt->V1_pm_tmr_blk;
+++ /dev/null
-/* -*- linux-c -*-
- * APM BIOS driver for Linux
- * Copyright 1994-2001 Stephen Rothwell (sfr@canb.auug.org.au)
- *
- * Initial development of this driver was funded by NEC Australia P/L
- * and NEC Corporation
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2, or (at your option) any
- * later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * October 1995, Rik Faith (faith@cs.unc.edu):
- * Minor enhancements and updates (to the patch set) for 1.3.x
- * Documentation
- * January 1996, Rik Faith (faith@cs.unc.edu):
- * Make /proc/apm easy to format (bump driver version)
- * March 1996, Rik Faith (faith@cs.unc.edu):
- * Prohibit APM BIOS calls unless apm_enabled.
- * (Thanks to Ulrich Windl <Ulrich.Windl@rz.uni-regensburg.de>)
- * April 1996, Stephen Rothwell (sfr@canb.auug.org.au)
- * Version 1.0 and 1.1
- * May 1996, Version 1.2
- * Feb 1998, Version 1.3
- * Feb 1998, Version 1.4
- * Aug 1998, Version 1.5
- * Sep 1998, Version 1.6
- * Nov 1998, Version 1.7
- * Jan 1999, Version 1.8
- * Jan 1999, Version 1.9
- * Oct 1999, Version 1.10
- * Nov 1999, Version 1.11
- * Jan 2000, Version 1.12
- * Feb 2000, Version 1.13
- * Nov 2000, Version 1.14
- * Oct 2001, Version 1.15
- * Jan 2002, Version 1.16
- * Oct 2002, Version 1.16ac
- *
- * History:
- * 0.6b: first version in official kernel, Linux 1.3.46
- * 0.7: changed /proc/apm format, Linux 1.3.58
- * 0.8: fixed gcc 2.7.[12] compilation problems, Linux 1.3.59
- * 0.9: only call bios if bios is present, Linux 1.3.72
- * 1.0: use fixed device number, consolidate /proc/apm into this file,
- * Linux 1.3.85
- * 1.1: support user-space standby and suspend, power off after system
- * halted, Linux 1.3.98
- * 1.2: When resetting RTC after resume, take care so that the time
- * is only incorrect by 30-60mS (vs. 1S previously) (Gabor J. Toth
- * <jtoth@princeton.edu>); improve interaction between
- * screen-blanking and gpm (Stephen Rothwell); Linux 1.99.4
- * 1.2a:Simple change to stop mysterious bug reports with SMP also added
- * levels to the printk calls. APM is not defined for SMP machines.
- * The new replacment for it is, but Linux doesn't yet support this.
- * Alan Cox Linux 2.1.55
- * 1.3: Set up a valid data descriptor 0x40 for buggy BIOS's
- * 1.4: Upgraded to support APM 1.2. Integrated ThinkPad suspend patch by
- * Dean Gaudet <dgaudet@arctic.org>.
- * C. Scott Ananian <cananian@alumni.princeton.edu> Linux 2.1.87
- * 1.5: Fix segment register reloading (in case of bad segments saved
- * across BIOS call).
- * Stephen Rothwell
- * 1.6: Cope with complier/assembler differences.
- * Only try to turn off the first display device.
- * Fix OOPS at power off with no APM BIOS by Jan Echternach
- * <echter@informatik.uni-rostock.de>
- * Stephen Rothwell
- * 1.7: Modify driver's cached copy of the disabled/disengaged flags
- * to reflect current state of APM BIOS.
- * Chris Rankin <rankinc@bellsouth.net>
- * Reset interrupt 0 timer to 100Hz after suspend
- * Chad Miller <cmiller@surfsouth.com>
- * Add CONFIG_APM_IGNORE_SUSPEND_BOUNCE
- * Richard Gooch <rgooch@atnf.csiro.au>
- * Allow boot time disabling of APM
- * Make boot messages far less verbose by default
- * Make asm safer
- * Stephen Rothwell
- * 1.8: Add CONFIG_APM_RTC_IS_GMT
- * Richard Gooch <rgooch@atnf.csiro.au>
- * change APM_NOINTS to CONFIG_APM_ALLOW_INTS
- * remove dependency on CONFIG_PROC_FS
- * Stephen Rothwell
- * 1.9: Fix small typo. <laslo@wodip.opole.pl>
- * Try to cope with BIOS's that need to have all display
- * devices blanked and not just the first one.
- * Ross Paterson <ross@soi.city.ac.uk>
- * Fix segment limit setting it has always been wrong as
- * the segments needed to have byte granularity.
- * Mark a few things __init.
- * Add hack to allow power off of SMP systems by popular request.
- * Use CONFIG_SMP instead of __SMP__
- * Ignore BOUNCES for three seconds.
- * Stephen Rothwell
- * 1.10: Fix for Thinkpad return code.
- * Merge 2.2 and 2.3 drivers.
- * Remove APM dependencies in arch/i386/kernel/process.c
- * Remove APM dependencies in drivers/char/sysrq.c
- * Reset time across standby.
- * Allow more inititialisation on SMP.
- * Remove CONFIG_APM_POWER_OFF and make it boot time
- * configurable (default on).
- * Make debug only a boot time parameter (remove APM_DEBUG).
- * Try to blank all devices on any error.
- * 1.11: Remove APM dependencies in drivers/char/console.c
- * Check nr_running to detect if we are idle (from
- * Borislav Deianov <borislav@lix.polytechnique.fr>)
- * Fix for bioses that don't zero the top part of the
- * entrypoint offset (Mario Sitta <sitta@al.unipmn.it>)
- * (reported by Panos Katsaloulis <teras@writeme.com>).
- * Real mode power off patch (Walter Hofmann
- * <Walter.Hofmann@physik.stud.uni-erlangen.de>).
- * 1.12: Remove CONFIG_SMP as the compiler will optimize
- * the code away anyway (smp_num_cpus == 1 in UP)
- * noted by Artur Skawina <skawina@geocities.com>.
- * Make power off under SMP work again.
- * Fix thinko with initial engaging of BIOS.
- * Make sure power off only happens on CPU 0
- * (Paul "Rusty" Russell <rusty@rustcorp.com.au>).
- * Do error notification to user mode if BIOS calls fail.
- * Move entrypoint offset fix to ...boot/setup.S
- * where it belongs (Cosmos <gis88564@cis.nctu.edu.tw>).
- * Remove smp-power-off. SMP users must now specify
- * "apm=power-off" on the kernel command line. Suggested
- * by Jim Avera <jima@hal.com>, modified by Alan Cox
- * <alan@lxorguk.ukuu.org.uk>.
- * Register the /proc/apm entry even on SMP so that
- * scripts that check for it before doing power off
- * work (Jim Avera <jima@hal.com>).
- * 1.13: Changes for new pm_ interfaces (Andy Henroid
- * <andy_henroid@yahoo.com>).
- * Modularize the code.
- * Fix the Thinkpad (again) :-( (CONFIG_APM_IGNORE_MULTIPLE_SUSPENDS
- * is now the way life works).
- * Fix thinko in suspend() (wrong return).
- * Notify drivers on critical suspend.
- * Make kapmd absorb more idle time (Pavel Machek <pavel@suse.cz>
- * modified by sfr).
- * Disable interrupts while we are suspended (Andy Henroid
- * <andy_henroid@yahoo.com> fixed by sfr).
- * Make power off work on SMP again (Tony Hoyle
- * <tmh@magenta-logic.com> and <zlatko@iskon.hr>) modified by sfr.
- * Remove CONFIG_APM_SUSPEND_BOUNCE. The bounce ignore
- * interval is now configurable.
- * 1.14: Make connection version persist across module unload/load.
- * Enable and engage power management earlier.
- * Disengage power management on module unload.
- * Changed to use the sysrq-register hack for registering the
- * power off function called by magic sysrq based upon discussions
- * in irc://irc.openprojects.net/#kernelnewbies
- * (Crutcher Dunnavant <crutcher+kernel@datastacks.com>).
- * Make CONFIG_APM_REAL_MODE_POWER_OFF run time configurable.
- * (Arjan van de Ven <arjanv@redhat.com>) modified by sfr.
- * Work around byte swap bug in one of the Vaio's BIOS's
- * (Marc Boucher <marc@mbsi.ca>).
- * Exposed the disable flag to dmi so that we can handle known
- * broken APM (Alan Cox <alan@redhat.com>).
- * 1.14ac: If the BIOS says "I slowed the CPU down" then don't spin
- * calling it - instead idle. (Alan Cox <alan@redhat.com>)
- * If an APM idle fails log it and idle sensibly
- * 1.15: Don't queue events to clients who open the device O_WRONLY.
- * Don't expect replies from clients who open the device O_RDONLY.
- * (Idea from Thomas Hood)
- * Minor waitqueue cleanups. (John Fremlin <chief@bandits.org>)
- * 1.16: Fix idle calling. (Andreas Steinmetz <ast@domdv.de> et al.)
- * Notify listeners of standby or suspend events before notifying
- * drivers. Return EBUSY to ioctl() if suspend is rejected.
- * (Russell King <rmk@arm.linux.org.uk> and Thomas Hood)
- * Ignore first resume after we generate our own resume event
- * after a suspend (Thomas Hood)
- * Daemonize now gets rid of our controlling terminal (sfr).
- * CONFIG_APM_CPU_IDLE now just affects the default value of
- * idle_threshold (sfr).
- * Change name of kernel apm daemon (as it no longer idles) (sfr).
- * 1.16ac: Fix up SMP support somewhat. You can now force SMP on and we
- * make _all_ APM calls on the CPU#0. Fix unsafe sign bug.
- * TODO: determine if its "boot CPU" or "CPU0" we want to lock to.
- *
- * APM 1.1 Reference:
- *
- * Intel Corporation, Microsoft Corporation. Advanced Power Management
- * (APM) BIOS Interface Specification, Revision 1.1, September 1993.
- * Intel Order Number 241704-001. Microsoft Part Number 781-110-X01.
- *
- * [This document is available free from Intel by calling 800.628.8686 (fax
- * 916.356.6100) or 800.548.4725; or via anonymous ftp from
- * ftp://ftp.intel.com/pub/IAL/software_specs/apmv11.doc. It is also
- * available from Microsoft by calling 206.882.8080.]
- *
- * APM 1.2 Reference:
- * Intel Corporation, Microsoft Corporation. Advanced Power Management
- * (APM) BIOS Interface Specification, Revision 1.2, February 1996.
- *
- * [This document is available from Microsoft at:
- * http://www.microsoft.com/hwdev/busbios/amp_12.htm]
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-
-#include <linux/poll.h>
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/timer.h>
-#include <linux/fcntl.h>
-#include <linux/slab.h>
-#include <linux/stat.h>
-#include <linux/proc_fs.h>
-#include <linux/miscdevice.h>
-#include <linux/apm_bios.h>
-#include <linux/init.h>
-#include <linux/time.h>
-#include <linux/sched.h>
-#include <linux/pm.h>
-#include <linux/device.h>
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
-#include <linux/dmi.h>
-#include <linux/suspend.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-#include <asm/desc.h>
-#include <asm/i8253.h>
-
-#include "io_ports.h"
-
-extern unsigned long get_cmos_time(void);
-extern void machine_real_restart(unsigned char *, int);
-
-#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
-extern int (*console_blank_hook)(int);
-#endif
-
-/*
- * The apm_bios device is one of the misc char devices.
- * This is its minor number.
- */
-#define APM_MINOR_DEV 134
-
-/*
- * See Documentation/Config.help for the configuration options.
- *
- * Various options can be changed at boot time as follows:
- * (We allow underscores for compatibility with the modules code)
- * apm=on/off enable/disable APM
- * [no-]allow[-_]ints allow interrupts during BIOS calls
- * [no-]broken[-_]psr BIOS has a broken GetPowerStatus call
- * [no-]realmode[-_]power[-_]off switch to real mode before
- * powering off
- * [no-]debug log some debugging messages
- * [no-]power[-_]off power off on shutdown
- * [no-]smp Use apm even on an SMP box
- * bounce[-_]interval=<n> number of ticks to ignore suspend
- * bounces
- * idle[-_]threshold=<n> System idle percentage above which to
- * make APM BIOS idle calls. Set it to
- * 100 to disable.
- * idle[-_]period=<n> Period (in 1/100s of a second) over
- * which the idle percentage is
- * calculated.
- */
-
-/* KNOWN PROBLEM MACHINES:
- *
- * U: TI 4000M TravelMate: BIOS is *NOT* APM compliant
- * [Confirmed by TI representative]
- * ?: ACER 486DX4/75: uses dseg 0040, in violation of APM specification
- * [Confirmed by BIOS disassembly]
- * [This may work now ...]
- * P: Toshiba 1950S: battery life information only gets updated after resume
- * P: Midwest Micro Soundbook Elite DX2/66 monochrome: screen blanking
- * broken in BIOS [Reported by Garst R. Reese <reese@isn.net>]
- * ?: AcerNote-950: oops on reading /proc/apm - workaround is a WIP
- * Neale Banks <neale@lowendale.com.au> December 2000
- *
- * Legend: U = unusable with APM patches
- * P = partially usable with APM patches
- */
-
-/*
- * Define as 1 to make the driver always call the APM BIOS busy
- * routine even if the clock was not reported as slowed by the
- * idle routine. Otherwise, define as 0.
- */
-#define ALWAYS_CALL_BUSY 1
-
-/*
- * Define to make the APM BIOS calls zero all data segment registers (so
- * that an incorrect BIOS implementation will cause a kernel panic if it
- * tries to write to arbitrary memory).
- */
-#define APM_ZERO_SEGS
-
-#include "apm.h"
-
-/*
- * Define to make all _set_limit calls use 64k limits. The APM 1.1 BIOS is
- * supposed to provide limit information that it recognizes. Many machines
- * do this correctly, but many others do not restrict themselves to their
- * claimed limit. When this happens, they will cause a segmentation
- * violation in the kernel at boot time. Most BIOS's, however, will
- * respect a 64k limit, so we use that. If you want to be pedantic and
- * hold your BIOS to its claims, then undefine this.
- */
-#define APM_RELAX_SEGMENTS
-
-/*
- * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend.
- * This patched by Chad Miller <cmiller@surfsouth.com>, original code by
- * David Chen <chen@ctpa04.mit.edu>
- */
-#undef INIT_TIMER_AFTER_SUSPEND
-
-#ifdef INIT_TIMER_AFTER_SUSPEND
-#include <linux/timex.h>
-#include <asm/io.h>
-#include <linux/delay.h>
-#endif
-
-/*
- * Need to poll the APM BIOS every second
- */
-#define APM_CHECK_TIMEOUT (HZ)
-
-/*
- * Ignore suspend events for this amount of time after a resume
- */
-#define DEFAULT_BOUNCE_INTERVAL (3 * HZ)
-
-/*
- * Maximum number of events stored
- */
-#define APM_MAX_EVENTS 20
-
-/*
- * The per-file APM data
- */
-struct apm_user {
- int magic;
- struct apm_user * next;
- unsigned int suser: 1;
- unsigned int writer: 1;
- unsigned int reader: 1;
- unsigned int suspend_wait: 1;
- int suspend_result;
- int suspends_pending;
- int standbys_pending;
- int suspends_read;
- int standbys_read;
- int event_head;
- int event_tail;
- apm_event_t events[APM_MAX_EVENTS];
-};
-
-/*
- * The magic number in apm_user
- */
-#define APM_BIOS_MAGIC 0x4101
-
-/*
- * idle percentage above which bios idle calls are done
- */
-#ifdef CONFIG_APM_CPU_IDLE
-#define DEFAULT_IDLE_THRESHOLD 95
-#else
-#define DEFAULT_IDLE_THRESHOLD 100
-#endif
-#define DEFAULT_IDLE_PERIOD (100 / 3)
-
-/*
- * Local variables
- */
-static struct {
- unsigned long offset;
- unsigned short segment;
-} apm_bios_entry;
-static int clock_slowed;
-static int idle_threshold = DEFAULT_IDLE_THRESHOLD;
-static int idle_period = DEFAULT_IDLE_PERIOD;
-static int set_pm_idle;
-static int suspends_pending;
-static int standbys_pending;
-static int ignore_sys_suspend;
-static int ignore_normal_resume;
-static int bounce_interval = DEFAULT_BOUNCE_INTERVAL;
-
-#ifdef CONFIG_APM_RTC_IS_GMT
-# define clock_cmos_diff 0
-# define got_clock_diff 1
-#else
-static long clock_cmos_diff;
-static int got_clock_diff;
-#endif
-static int debug;
-static int smp;
-static int apm_disabled = -1;
-#ifdef CONFIG_SMP
-static int power_off;
-#else
-static int power_off = 1;
-#endif
-#ifdef CONFIG_APM_REAL_MODE_POWER_OFF
-static int realmode_power_off = 1;
-#else
-static int realmode_power_off;
-#endif
-static int exit_kapmd;
-static int kapmd_running;
-#ifdef CONFIG_APM_ALLOW_INTS
-static int allow_ints = 1;
-#else
-static int allow_ints;
-#endif
-static int broken_psr;
-
-static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
-static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
-static struct apm_user * user_list;
-static DEFINE_SPINLOCK(user_list_lock);
-static struct desc_struct bad_bios_desc = { 0, 0x00409200 };
-
-static char driver_version[] = "1.16ac"; /* no spaces */
-
-/*
- * APM event names taken from the APM 1.2 specification. These are
- * the message codes that the BIOS uses to tell us about events
- */
-static char * apm_event_name[] = {
- "system standby",
- "system suspend",
- "normal resume",
- "critical resume",
- "low battery",
- "power status change",
- "update time",
- "critical suspend",
- "user standby",
- "user suspend",
- "system standby resume",
- "capabilities change"
-};
-#define NR_APM_EVENT_NAME \
- (sizeof(apm_event_name) / sizeof(apm_event_name[0]))
-
-typedef struct lookup_t {
- int key;
- char * msg;
-} lookup_t;
-
-/*
- * The BIOS returns a set of standard error codes in AX when the
- * carry flag is set.
- */
-
-static const lookup_t error_table[] = {
-/* N/A { APM_SUCCESS, "Operation succeeded" }, */
- { APM_DISABLED, "Power management disabled" },
- { APM_CONNECTED, "Real mode interface already connected" },
- { APM_NOT_CONNECTED, "Interface not connected" },
- { APM_16_CONNECTED, "16 bit interface already connected" },
-/* N/A { APM_16_UNSUPPORTED, "16 bit interface not supported" }, */
- { APM_32_CONNECTED, "32 bit interface already connected" },
- { APM_32_UNSUPPORTED, "32 bit interface not supported" },
- { APM_BAD_DEVICE, "Unrecognized device ID" },
- { APM_BAD_PARAM, "Parameter out of range" },
- { APM_NOT_ENGAGED, "Interface not engaged" },
- { APM_BAD_FUNCTION, "Function not supported" },
- { APM_RESUME_DISABLED, "Resume timer disabled" },
- { APM_BAD_STATE, "Unable to enter requested state" },
-/* N/A { APM_NO_EVENTS, "No events pending" }, */
- { APM_NO_ERROR, "BIOS did not set a return code" },
- { APM_NOT_PRESENT, "No APM present" }
-};
-#define ERROR_COUNT (sizeof(error_table)/sizeof(lookup_t))
-
-/**
- * apm_error - display an APM error
- * @str: information string
- * @err: APM BIOS return code
- *
- * Write a meaningful log entry to the kernel log in the event of
- * an APM error.
- */
-
-static void apm_error(char *str, int err)
-{
- int i;
-
- for (i = 0; i < ERROR_COUNT; i++)
- if (error_table[i].key == err) break;
- if (i < ERROR_COUNT)
- printk(KERN_NOTICE "apm: %s: %s\n", str, error_table[i].msg);
- else
- printk(KERN_NOTICE "apm: %s: unknown error code %#2.2x\n",
- str, err);
-}
-
-/*
- * Lock APM functionality to physical CPU 0
- */
-
-#ifdef CONFIG_SMP
-
-static cpumask_t apm_save_cpus(void)
-{
- cpumask_t x = current->cpus_allowed;
- /* Some bioses don't like being called from CPU != 0 */
- set_cpus_allowed(current, cpumask_of_cpu(0));
- BUG_ON(smp_processor_id() != 0);
- return x;
-}
-
-static inline void apm_restore_cpus(cpumask_t mask)
-{
- set_cpus_allowed(current, mask);
-}
-
-#else
-
-/*
- * No CPU lockdown needed on a uniprocessor
- */
-
-#define apm_save_cpus() (current->cpus_allowed)
-#define apm_restore_cpus(x) (void)(x)
-
-#endif
-
-/*
- * These are the actual BIOS calls. Depending on APM_ZERO_SEGS and
- * apm_info.allow_ints, we are being really paranoid here! Not only
- * are interrupts disabled, but all the segment registers (except SS)
- * are saved and zeroed this means that if the BIOS tries to reference
- * any data without explicitly loading the segment registers, the kernel
- * will fault immediately rather than have some unforeseen circumstances
- * for the rest of the kernel. And it will be very obvious! :-) Doing
- * this depends on CS referring to the same physical memory as DS so that
- * DS can be zeroed before the call. Unfortunately, we can't do anything
- * about the stack segment/pointer. Also, we tell the compiler that
- * everything could change.
- *
- * Also, we KNOW that for the non error case of apm_bios_call, there
- * is no useful data returned in the low order 8 bits of eax.
- */
-#define APM_DO_CLI \
- if (apm_info.allow_ints) \
- local_irq_enable(); \
- else \
- local_irq_disable();
-
-#ifdef APM_ZERO_SEGS
-# define APM_DECL_SEGS \
- unsigned int saved_fs; unsigned int saved_gs;
-# define APM_DO_SAVE_SEGS \
- savesegment(fs, saved_fs); savesegment(gs, saved_gs)
-# define APM_DO_RESTORE_SEGS \
- loadsegment(fs, saved_fs); loadsegment(gs, saved_gs)
-#else
-# define APM_DECL_SEGS
-# define APM_DO_SAVE_SEGS
-# define APM_DO_RESTORE_SEGS
-#endif
-
-/**
- * apm_bios_call - Make an APM BIOS 32bit call
- * @func: APM function to execute
- * @ebx_in: EBX register for call entry
- * @ecx_in: ECX register for call entry
- * @eax: EAX register return
- * @ebx: EBX register return
- * @ecx: ECX register return
- * @edx: EDX register return
- * @esi: ESI register return
- *
- * Make an APM call using the 32bit protected mode interface. The
- * caller is responsible for knowing if APM BIOS is configured and
- * enabled. This call can disable interrupts for a long period of
- * time on some laptops. The return value is in AH and the carry
- * flag is loaded into AL. If there is an error, then the error
- * code is returned in AH (bits 8-15 of eax) and this function
- * returns non-zero.
- */
-
-static u8 apm_bios_call(u32 func, u32 ebx_in, u32 ecx_in,
- u32 *eax, u32 *ebx, u32 *ecx, u32 *edx, u32 *esi)
-{
- APM_DECL_SEGS
- unsigned long flags;
- cpumask_t cpus;
- int cpu;
- struct desc_struct save_desc_40;
-
- cpus = apm_save_cpus();
-
- cpu = get_cpu();
- save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
- get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;
-
- local_save_flags(flags);
- APM_DO_CLI;
- APM_DO_SAVE_SEGS;
- apm_bios_call_asm(func, ebx_in, ecx_in, eax, ebx, ecx, edx, esi);
- APM_DO_RESTORE_SEGS;
- local_irq_restore(flags);
- get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
- put_cpu();
- apm_restore_cpus(cpus);
-
- return *eax & 0xff;
-}
-
-/**
- * apm_bios_call_simple - make a simple APM BIOS 32bit call
- * @func: APM function to invoke
- * @ebx_in: EBX register value for BIOS call
- * @ecx_in: ECX register value for BIOS call
- * @eax: EAX register on return from the BIOS call
- *
- * Make a BIOS call that does only returns one value, or just status.
- * If there is an error, then the error code is returned in AH
- * (bits 8-15 of eax) and this function returns non-zero. This is
- * used for simpler BIOS operations. This call may hold interrupts
- * off for a long time on some laptops.
- */
-
-static u8 apm_bios_call_simple(u32 func, u32 ebx_in, u32 ecx_in, u32 *eax)
-{
- u8 error;
- APM_DECL_SEGS
- unsigned long flags;
- cpumask_t cpus;
- int cpu;
- struct desc_struct save_desc_40;
-
-
- cpus = apm_save_cpus();
-
- cpu = get_cpu();
- save_desc_40 = get_cpu_gdt_table(cpu)[0x40 / 8];
- get_cpu_gdt_table(cpu)[0x40 / 8] = bad_bios_desc;
-
- local_save_flags(flags);
- APM_DO_CLI;
- APM_DO_SAVE_SEGS;
- error = apm_bios_call_simple_asm(func, ebx_in, ecx_in, eax);
- APM_DO_RESTORE_SEGS;
- local_irq_restore(flags);
- get_cpu_gdt_table(cpu)[0x40 / 8] = save_desc_40;
- put_cpu();
- apm_restore_cpus(cpus);
- return error;
-}
-
-/**
- * apm_driver_version - APM driver version
- * @val: loaded with the APM version on return
- *
- * Retrieve the APM version supported by the BIOS. This is only
- * supported for APM 1.1 or higher. An error indicates APM 1.0 is
- * probably present.
- *
- * On entry val should point to a value indicating the APM driver
- * version with the high byte being the major and the low byte the
- * minor number both in BCD
- *
- * On return it will hold the BIOS revision supported in the
- * same format.
- */
-
-static int apm_driver_version(u_short *val)
-{
- u32 eax;
-
- if (apm_bios_call_simple(APM_FUNC_VERSION, 0, *val, &eax))
- return (eax >> 8) & 0xff;
- *val = eax;
- return APM_SUCCESS;
-}
-
-/**
- * apm_get_event - get an APM event from the BIOS
- * @event: pointer to the event
- * @info: point to the event information
- *
- * The APM BIOS provides a polled information for event
- * reporting. The BIOS expects to be polled at least every second
- * when events are pending. When a message is found the caller should
- * poll until no more messages are present. However, this causes
- * problems on some laptops where a suspend event notification is
- * not cleared until it is acknowledged.
- *
- * Additional information is returned in the info pointer, providing
- * that APM 1.2 is in use. If no messges are pending the value 0x80
- * is returned (No power management events pending).
- */
-
-static int apm_get_event(apm_event_t *event, apm_eventinfo_t *info)
-{
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 dummy;
-
- if (apm_bios_call(APM_FUNC_GET_EVENT, 0, 0, &eax, &ebx, &ecx,
- &dummy, &dummy))
- return (eax >> 8) & 0xff;
- *event = ebx;
- if (apm_info.connection_version < 0x0102)
- *info = ~0; /* indicate info not valid */
- else
- *info = ecx;
- return APM_SUCCESS;
-}
-
-/**
- * set_power_state - set the power management state
- * @what: which items to transition
- * @state: state to transition to
- *
- * Request an APM change of state for one or more system devices. The
- * processor state must be transitioned last of all. what holds the
- * class of device in the upper byte and the device number (0xFF for
- * all) for the object to be transitioned.
- *
- * The state holds the state to transition to, which may in fact
- * be an acceptance of a BIOS requested state change.
- */
-
-static int set_power_state(u_short what, u_short state)
-{
- u32 eax;
-
- if (apm_bios_call_simple(APM_FUNC_SET_STATE, what, state, &eax))
- return (eax >> 8) & 0xff;
- return APM_SUCCESS;
-}
-
-/**
- * set_system_power_state - set system wide power state
- * @state: which state to enter
- *
- * Transition the entire system into a new APM power state.
- */
-
-static int set_system_power_state(u_short state)
-{
- return set_power_state(APM_DEVICE_ALL, state);
-}
-
-/**
- * apm_do_idle - perform power saving
- *
- * This function notifies the BIOS that the processor is (in the view
- * of the OS) idle. It returns -1 in the event that the BIOS refuses
- * to handle the idle request. On a success the function returns 1
- * if the BIOS did clock slowing or 0 otherwise.
- */
-
-static int apm_do_idle(void)
-{
- u32 eax;
-
- if (apm_bios_call_simple(APM_FUNC_IDLE, 0, 0, &eax)) {
- static unsigned long t;
-
- /* This always fails on some SMP boards running UP kernels.
- * Only report the failure the first 5 times.
- */
- if (++t < 5)
- {
- printk(KERN_DEBUG "apm_do_idle failed (%d)\n",
- (eax >> 8) & 0xff);
- t = jiffies;
- }
- return -1;
- }
- clock_slowed = (apm_info.bios.flags & APM_IDLE_SLOWS_CLOCK) != 0;
- return clock_slowed;
-}
-
-/**
- * apm_do_busy - inform the BIOS the CPU is busy
- *
- * Request that the BIOS brings the CPU back to full performance.
- */
-
-static void apm_do_busy(void)
-{
- u32 dummy;
-
- if (clock_slowed || ALWAYS_CALL_BUSY) {
- (void) apm_bios_call_simple(APM_FUNC_BUSY, 0, 0, &dummy);
- clock_slowed = 0;
- }
-}
-
-/*
- * If no process has really been interested in
- * the CPU for some time, we want to call BIOS
- * power management - we probably want
- * to conserve power.
- */
-#define IDLE_CALC_LIMIT (HZ * 100)
-#define IDLE_LEAKY_MAX 16
-
-static void (*original_pm_idle)(void);
-
-extern void default_idle(void);
-
-/**
- * apm_cpu_idle - cpu idling for APM capable Linux
- *
- * This is the idling function the kernel executes when APM is available. It
- * tries to do BIOS powermanagement based on the average system idle time.
- * Furthermore it calls the system default idle routine.
- */
-
-static void apm_cpu_idle(void)
-{
- static int use_apm_idle; /* = 0 */
- static unsigned int last_jiffies; /* = 0 */
- static unsigned int last_stime; /* = 0 */
-
- int apm_idle_done = 0;
- unsigned int jiffies_since_last_check = jiffies - last_jiffies;
- unsigned int bucket;
-
-recalc:
- if (jiffies_since_last_check > IDLE_CALC_LIMIT) {
- use_apm_idle = 0;
- last_jiffies = jiffies;
- last_stime = current->stime;
- } else if (jiffies_since_last_check > idle_period) {
- unsigned int idle_percentage;
-
- idle_percentage = current->stime - last_stime;
- idle_percentage *= 100;
- idle_percentage /= jiffies_since_last_check;
- use_apm_idle = (idle_percentage > idle_threshold);
- if (apm_info.forbid_idle)
- use_apm_idle = 0;
- last_jiffies = jiffies;
- last_stime = current->stime;
- }
-
- bucket = IDLE_LEAKY_MAX;
-
- while (!need_resched()) {
- if (use_apm_idle) {
- unsigned int t;
-
- t = jiffies;
- switch (apm_do_idle()) {
- case 0: apm_idle_done = 1;
- if (t != jiffies) {
- if (bucket) {
- bucket = IDLE_LEAKY_MAX;
- continue;
- }
- } else if (bucket) {
- bucket--;
- continue;
- }
- break;
- case 1: apm_idle_done = 1;
- break;
- default: /* BIOS refused */
- break;
- }
- }
- if (original_pm_idle)
- original_pm_idle();
- else
- default_idle();
- jiffies_since_last_check = jiffies - last_jiffies;
- if (jiffies_since_last_check > idle_period)
- goto recalc;
- }
-
- if (apm_idle_done)
- apm_do_busy();
-}
-
-/**
- * apm_power_off - ask the BIOS to power off
- *
- * Handle the power off sequence. This is the one piece of code we
- * will execute even on SMP machines. In order to deal with BIOS
- * bugs we support real mode APM BIOS power off calls. We also make
- * the SMP call on CPU0 as some systems will only honour this call
- * on their first cpu.
- */
-
-static void apm_power_off(void)
-{
- unsigned char po_bios_call[] = {
- 0xb8, 0x00, 0x10, /* movw $0x1000,ax */
- 0x8e, 0xd0, /* movw ax,ss */
- 0xbc, 0x00, 0xf0, /* movw $0xf000,sp */
- 0xb8, 0x07, 0x53, /* movw $0x5307,ax */
- 0xbb, 0x01, 0x00, /* movw $0x0001,bx */
- 0xb9, 0x03, 0x00, /* movw $0x0003,cx */
- 0xcd, 0x15 /* int $0x15 */
- };
-
- /* Some bioses don't like being called from CPU != 0 */
- if (apm_info.realmode_power_off)
- {
- (void)apm_save_cpus();
- machine_real_restart(po_bios_call, sizeof(po_bios_call));
- }
- else
- (void) set_system_power_state(APM_STATE_OFF);
-}
-
-#ifdef CONFIG_APM_DO_ENABLE
-
-/**
- * apm_enable_power_management - enable BIOS APM power management
- * @enable: enable yes/no
- *
- * Enable or disable the APM BIOS power services.
- */
-
-static int apm_enable_power_management(int enable)
-{
- u32 eax;
-
- if ((enable == 0) && (apm_info.bios.flags & APM_BIOS_DISENGAGED))
- return APM_NOT_ENGAGED;
- if (apm_bios_call_simple(APM_FUNC_ENABLE_PM, APM_DEVICE_BALL,
- enable, &eax))
- return (eax >> 8) & 0xff;
- if (enable)
- apm_info.bios.flags &= ~APM_BIOS_DISABLED;
- else
- apm_info.bios.flags |= APM_BIOS_DISABLED;
- return APM_SUCCESS;
-}
-#endif
-
-/**
- * apm_get_power_status - get current power state
- * @status: returned status
- * @bat: battery info
- * @life: estimated life
- *
- * Obtain the current power status from the APM BIOS. We return a
- * status which gives the rough battery status, and current power
- * source. The bat value returned give an estimate as a percentage
- * of life and a status value for the battery. The estimated life
- * if reported is a lifetime in secodnds/minutes at current powwer
- * consumption.
- */
-
-static int apm_get_power_status(u_short *status, u_short *bat, u_short *life)
-{
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 edx;
- u32 dummy;
-
- if (apm_info.get_power_status_broken)
- return APM_32_UNSUPPORTED;
- if (apm_bios_call(APM_FUNC_GET_STATUS, APM_DEVICE_ALL, 0,
- &eax, &ebx, &ecx, &edx, &dummy))
- return (eax >> 8) & 0xff;
- *status = ebx;
- *bat = ecx;
- if (apm_info.get_power_status_swabinminutes) {
- *life = swab16((u16)edx);
- *life |= 0x8000;
- } else
- *life = edx;
- return APM_SUCCESS;
-}
-
-#if 0
-static int apm_get_battery_status(u_short which, u_short *status,
- u_short *bat, u_short *life, u_short *nbat)
-{
- u32 eax;
- u32 ebx;
- u32 ecx;
- u32 edx;
- u32 esi;
-
- if (apm_info.connection_version < 0x0102) {
- /* pretend we only have one battery. */
- if (which != 1)
- return APM_BAD_DEVICE;
- *nbat = 1;
- return apm_get_power_status(status, bat, life);
- }
-
- if (apm_bios_call(APM_FUNC_GET_STATUS, (0x8000 | (which)), 0, &eax,
- &ebx, &ecx, &edx, &esi))
- return (eax >> 8) & 0xff;
- *status = ebx;
- *bat = ecx;
- *life = edx;
- *nbat = esi;
- return APM_SUCCESS;
-}
-#endif
-
-/**
- * apm_engage_power_management - enable PM on a device
- * @device: identity of device
- * @enable: on/off
- *
- * Activate or deactive power management on either a specific device
- * or the entire system (%APM_DEVICE_ALL).
- */
-
-static int apm_engage_power_management(u_short device, int enable)
-{
- u32 eax;
-
- if ((enable == 0) && (device == APM_DEVICE_ALL)
- && (apm_info.bios.flags & APM_BIOS_DISABLED))
- return APM_DISABLED;
- if (apm_bios_call_simple(APM_FUNC_ENGAGE_PM, device, enable, &eax))
- return (eax >> 8) & 0xff;
- if (device == APM_DEVICE_ALL) {
- if (enable)
- apm_info.bios.flags &= ~APM_BIOS_DISENGAGED;
- else
- apm_info.bios.flags |= APM_BIOS_DISENGAGED;
- }
- return APM_SUCCESS;
-}
-
-#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
-
-/**
- * apm_console_blank - blank the display
- * @blank: on/off
- *
- * Attempt to blank the console, firstly by blanking just video device
- * zero, and if that fails (some BIOSes don't support it) then it blanks
- * all video devices. Typically the BIOS will do laptop backlight and
- * monitor powerdown for us.
- */
-
-static int apm_console_blank(int blank)
-{
- int error;
- u_short state;
-
- state = blank ? APM_STATE_STANDBY : APM_STATE_READY;
- /* Blank the first display device */
- error = set_power_state(0x100, state);
- if ((error != APM_SUCCESS) && (error != APM_NO_ERROR)) {
- /* try to blank them all instead */
- error = set_power_state(0x1ff, state);
- if ((error != APM_SUCCESS) && (error != APM_NO_ERROR))
- /* try to blank device one instead */
- error = set_power_state(0x101, state);
- }
- if ((error == APM_SUCCESS) || (error == APM_NO_ERROR))
- return 1;
- if (error == APM_NOT_ENGAGED) {
- static int tried;
- int eng_error;
- if (tried++ == 0) {
- eng_error = apm_engage_power_management(APM_DEVICE_ALL, 1);
- if (eng_error) {
- apm_error("set display", error);
- apm_error("engage interface", eng_error);
- return 0;
- } else
- return apm_console_blank(blank);
- }
- }
- apm_error("set display", error);
- return 0;
-}
-#endif
-
-static int queue_empty(struct apm_user *as)
-{
- return as->event_head == as->event_tail;
-}
-
-static apm_event_t get_queued_event(struct apm_user *as)
-{
- as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS;
- return as->events[as->event_tail];
-}
-
-static void queue_event(apm_event_t event, struct apm_user *sender)
-{
- struct apm_user * as;
-
- spin_lock(&user_list_lock);
- if (user_list == NULL)
- goto out;
- for (as = user_list; as != NULL; as = as->next) {
- if ((as == sender) || (!as->reader))
- continue;
- as->event_head = (as->event_head + 1) % APM_MAX_EVENTS;
- if (as->event_head == as->event_tail) {
- static int notified;
-
- if (notified++ == 0)
- printk(KERN_ERR "apm: an event queue overflowed\n");
- as->event_tail = (as->event_tail + 1) % APM_MAX_EVENTS;
- }
- as->events[as->event_head] = event;
- if ((!as->suser) || (!as->writer))
- continue;
- switch (event) {
- case APM_SYS_SUSPEND:
- case APM_USER_SUSPEND:
- as->suspends_pending++;
- suspends_pending++;
- break;
-
- case APM_SYS_STANDBY:
- case APM_USER_STANDBY:
- as->standbys_pending++;
- standbys_pending++;
- break;
- }
- }
- wake_up_interruptible(&apm_waitqueue);
-out:
- spin_unlock(&user_list_lock);
-}
-
-static void set_time(void)
-{
- if (got_clock_diff) { /* Must know time zone in order to set clock */
- xtime.tv_sec = get_cmos_time() + clock_cmos_diff;
- xtime.tv_nsec = 0;
- }
-}
-
-static void get_time_diff(void)
-{
-#ifndef CONFIG_APM_RTC_IS_GMT
- /*
- * Estimate time zone so that set_time can update the clock
- */
- clock_cmos_diff = -get_cmos_time();
- clock_cmos_diff += get_seconds();
- got_clock_diff = 1;
-#endif
-}
-
-static void reinit_timer(void)
-{
-#ifdef INIT_TIMER_AFTER_SUSPEND
- unsigned long flags;
-
- spin_lock_irqsave(&i8253_lock, flags);
- /* set the clock to 100 Hz */
- outb_p(0x34, PIT_MODE); /* binary, mode 2, LSB/MSB, ch 0 */
- udelay(10);
- outb_p(LATCH & 0xff, PIT_CH0); /* LSB */
- udelay(10);
- outb(LATCH >> 8, PIT_CH0); /* MSB */
- udelay(10);
- spin_unlock_irqrestore(&i8253_lock, flags);
-#endif
-}
-
-static int suspend(int vetoable)
-{
- int err;
- struct apm_user *as;
-
- if (pm_send_all(PM_SUSPEND, (void *)3)) {
- /* Vetoed */
- if (vetoable) {
- if (apm_info.connection_version > 0x100)
- set_system_power_state(APM_STATE_REJECT);
- err = -EBUSY;
- ignore_sys_suspend = 0;
- printk(KERN_WARNING "apm: suspend was vetoed.\n");
- goto out;
- }
- printk(KERN_CRIT "apm: suspend was vetoed, but suspending anyway.\n");
- }
-
- device_suspend(PMSG_SUSPEND);
- local_irq_disable();
- device_power_down(PMSG_SUSPEND);
-
- /* serialize with the timer interrupt */
- write_seqlock(&xtime_lock);
-
- /* protect against access to timer chip registers */
- spin_lock(&i8253_lock);
-
- get_time_diff();
- /*
- * Irq spinlock must be dropped around set_system_power_state.
- * We'll undo any timer changes due to interrupts below.
- */
- spin_unlock(&i8253_lock);
- write_sequnlock(&xtime_lock);
- local_irq_enable();
-
- save_processor_state();
- err = set_system_power_state(APM_STATE_SUSPEND);
- ignore_normal_resume = 1;
- restore_processor_state();
-
- local_irq_disable();
- write_seqlock(&xtime_lock);
- spin_lock(&i8253_lock);
- reinit_timer();
- set_time();
-
- spin_unlock(&i8253_lock);
- write_sequnlock(&xtime_lock);
-
- if (err == APM_NO_ERROR)
- err = APM_SUCCESS;
- if (err != APM_SUCCESS)
- apm_error("suspend", err);
- err = (err == APM_SUCCESS) ? 0 : -EIO;
- device_power_up();
- local_irq_enable();
- device_resume();
- pm_send_all(PM_RESUME, (void *)0);
- queue_event(APM_NORMAL_RESUME, NULL);
- out:
- spin_lock(&user_list_lock);
- for (as = user_list; as != NULL; as = as->next) {
- as->suspend_wait = 0;
- as->suspend_result = err;
- }
- spin_unlock(&user_list_lock);
- wake_up_interruptible(&apm_suspend_waitqueue);
- return err;
-}
-
-static void standby(void)
-{
- int err;
-
- local_irq_disable();
- device_power_down(PMSG_SUSPEND);
- /* serialize with the timer interrupt */
- write_seqlock(&xtime_lock);
- /* If needed, notify drivers here */
- get_time_diff();
- write_sequnlock(&xtime_lock);
- local_irq_enable();
-
- err = set_system_power_state(APM_STATE_STANDBY);
- if ((err != APM_SUCCESS) && (err != APM_NO_ERROR))
- apm_error("standby", err);
-
- local_irq_disable();
- device_power_up();
- local_irq_enable();
-}
-
-static apm_event_t get_event(void)
-{
- int error;
- apm_event_t event;
- apm_eventinfo_t info;
-
- static int notified;
-
- /* we don't use the eventinfo */
- error = apm_get_event(&event, &info);
- if (error == APM_SUCCESS)
- return event;
-
- if ((error != APM_NO_EVENTS) && (notified++ == 0))
- apm_error("get_event", error);
-
- return 0;
-}
-
-static void check_events(void)
-{
- apm_event_t event;
- static unsigned long last_resume;
- static int ignore_bounce;
-
- while ((event = get_event()) != 0) {
- if (debug) {
- if (event <= NR_APM_EVENT_NAME)
- printk(KERN_DEBUG "apm: received %s notify\n",
- apm_event_name[event - 1]);
- else
- printk(KERN_DEBUG "apm: received unknown "
- "event 0x%02x\n", event);
- }
- if (ignore_bounce
- && ((jiffies - last_resume) > bounce_interval))
- ignore_bounce = 0;
-
- switch (event) {
- case APM_SYS_STANDBY:
- case APM_USER_STANDBY:
- queue_event(event, NULL);
- if (standbys_pending <= 0)
- standby();
- break;
-
- case APM_USER_SUSPEND:
-#ifdef CONFIG_APM_IGNORE_USER_SUSPEND
- if (apm_info.connection_version > 0x100)
- set_system_power_state(APM_STATE_REJECT);
- break;
-#endif
- case APM_SYS_SUSPEND:
- if (ignore_bounce) {
- if (apm_info.connection_version > 0x100)
- set_system_power_state(APM_STATE_REJECT);
- break;
- }
- /*
- * If we are already processing a SUSPEND,
- * then further SUSPEND events from the BIOS
- * will be ignored. We also return here to
- * cope with the fact that the Thinkpads keep
- * sending a SUSPEND event until something else
- * happens!
- */
- if (ignore_sys_suspend)
- return;
- ignore_sys_suspend = 1;
- queue_event(event, NULL);
- if (suspends_pending <= 0)
- (void) suspend(1);
- break;
-
- case APM_NORMAL_RESUME:
- case APM_CRITICAL_RESUME:
- case APM_STANDBY_RESUME:
- ignore_sys_suspend = 0;
- last_resume = jiffies;
- ignore_bounce = 1;
- if ((event != APM_NORMAL_RESUME)
- || (ignore_normal_resume == 0)) {
- write_seqlock_irq(&xtime_lock);
- set_time();
- write_sequnlock_irq(&xtime_lock);
- device_resume();
- pm_send_all(PM_RESUME, (void *)0);
- queue_event(event, NULL);
- }
- ignore_normal_resume = 0;
- break;
-
- case APM_CAPABILITY_CHANGE:
- case APM_LOW_BATTERY:
- case APM_POWER_STATUS_CHANGE:
- queue_event(event, NULL);
- /* If needed, notify drivers here */
- break;
-
- case APM_UPDATE_TIME:
- write_seqlock_irq(&xtime_lock);
- set_time();
- write_sequnlock_irq(&xtime_lock);
- break;
-
- case APM_CRITICAL_SUSPEND:
- /*
- * We are not allowed to reject a critical suspend.
- */
- (void) suspend(0);
- break;
- }
- }
-}
-
-static void apm_event_handler(void)
-{
- static int pending_count = 4;
- int err;
-
- if ((standbys_pending > 0) || (suspends_pending > 0)) {
- if ((apm_info.connection_version > 0x100) &&
- (pending_count-- <= 0)) {
- pending_count = 4;
- if (debug)
- printk(KERN_DEBUG "apm: setting state busy\n");
- err = set_system_power_state(APM_STATE_BUSY);
- if (err)
- apm_error("busy", err);
- }
- } else
- pending_count = 4;
- check_events();
-}
-
-/*
- * This is the APM thread main loop.
- */
-
-static void apm_mainloop(void)
-{
- DECLARE_WAITQUEUE(wait, current);
-
- add_wait_queue(&apm_waitqueue, &wait);
- set_current_state(TASK_INTERRUPTIBLE);
- for (;;) {
- schedule_timeout(APM_CHECK_TIMEOUT);
- if (exit_kapmd)
- break;
- /*
- * Ok, check all events, check for idle (and mark us sleeping
- * so as not to count towards the load average)..
- */
- set_current_state(TASK_INTERRUPTIBLE);
- apm_event_handler();
- }
- remove_wait_queue(&apm_waitqueue, &wait);
-}
-
-static int check_apm_user(struct apm_user *as, const char *func)
-{
- if ((as == NULL) || (as->magic != APM_BIOS_MAGIC)) {
- printk(KERN_ERR "apm: %s passed bad filp\n", func);
- return 1;
- }
- return 0;
-}
-
-static ssize_t do_read(struct file *fp, char __user *buf, size_t count, loff_t *ppos)
-{
- struct apm_user * as;
- int i;
- apm_event_t event;
-
- as = fp->private_data;
- if (check_apm_user(as, "read"))
- return -EIO;
- if ((int)count < sizeof(apm_event_t))
- return -EINVAL;
- if ((queue_empty(as)) && (fp->f_flags & O_NONBLOCK))
- return -EAGAIN;
- wait_event_interruptible(apm_waitqueue, !queue_empty(as));
- i = count;
- while ((i >= sizeof(event)) && !queue_empty(as)) {
- event = get_queued_event(as);
- if (copy_to_user(buf, &event, sizeof(event))) {
- if (i < count)
- break;
- return -EFAULT;
- }
- switch (event) {
- case APM_SYS_SUSPEND:
- case APM_USER_SUSPEND:
- as->suspends_read++;
- break;
-
- case APM_SYS_STANDBY:
- case APM_USER_STANDBY:
- as->standbys_read++;
- break;
- }
- buf += sizeof(event);
- i -= sizeof(event);
- }
- if (i < count)
- return count - i;
- if (signal_pending(current))
- return -ERESTARTSYS;
- return 0;
-}
-
-static unsigned int do_poll(struct file *fp, poll_table * wait)
-{
- struct apm_user * as;
-
- as = fp->private_data;
- if (check_apm_user(as, "poll"))
- return 0;
- poll_wait(fp, &apm_waitqueue, wait);
- if (!queue_empty(as))
- return POLLIN | POLLRDNORM;
- return 0;
-}
-
-static int do_ioctl(struct inode * inode, struct file *filp,
- u_int cmd, u_long arg)
-{
- struct apm_user * as;
-
- as = filp->private_data;
- if (check_apm_user(as, "ioctl"))
- return -EIO;
- if ((!as->suser) || (!as->writer))
- return -EPERM;
- switch (cmd) {
- case APM_IOC_STANDBY:
- if (as->standbys_read > 0) {
- as->standbys_read--;
- as->standbys_pending--;
- standbys_pending--;
- } else
- queue_event(APM_USER_STANDBY, as);
- if (standbys_pending <= 0)
- standby();
- break;
- case APM_IOC_SUSPEND:
- if (as->suspends_read > 0) {
- as->suspends_read--;
- as->suspends_pending--;
- suspends_pending--;
- } else
- queue_event(APM_USER_SUSPEND, as);
- if (suspends_pending <= 0) {
- return suspend(1);
- } else {
- as->suspend_wait = 1;
- wait_event_interruptible(apm_suspend_waitqueue,
- as->suspend_wait == 0);
- return as->suspend_result;
- }
- break;
- default:
- return -EINVAL;
- }
- return 0;
-}
-
-static int do_release(struct inode * inode, struct file * filp)
-{
- struct apm_user * as;
-
- as = filp->private_data;
- if (check_apm_user(as, "release"))
- return 0;
- filp->private_data = NULL;
- if (as->standbys_pending > 0) {
- standbys_pending -= as->standbys_pending;
- if (standbys_pending <= 0)
- standby();
- }
- if (as->suspends_pending > 0) {
- suspends_pending -= as->suspends_pending;
- if (suspends_pending <= 0)
- (void) suspend(1);
- }
- spin_lock(&user_list_lock);
- if (user_list == as)
- user_list = as->next;
- else {
- struct apm_user * as1;
-
- for (as1 = user_list;
- (as1 != NULL) && (as1->next != as);
- as1 = as1->next)
- ;
- if (as1 == NULL)
- printk(KERN_ERR "apm: filp not in user list\n");
- else
- as1->next = as->next;
- }
- spin_unlock(&user_list_lock);
- kfree(as);
- return 0;
-}
-
-static int do_open(struct inode * inode, struct file * filp)
-{
- struct apm_user * as;
-
- as = (struct apm_user *)kmalloc(sizeof(*as), GFP_KERNEL);
- if (as == NULL) {
- printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
- sizeof(*as));
- return -ENOMEM;
- }
- as->magic = APM_BIOS_MAGIC;
- as->event_tail = as->event_head = 0;
- as->suspends_pending = as->standbys_pending = 0;
- as->suspends_read = as->standbys_read = 0;
- /*
- * XXX - this is a tiny bit broken, when we consider BSD
- * process accounting. If the device is opened by root, we
- * instantly flag that we used superuser privs. Who knows,
- * we might close the device immediately without doing a
- * privileged operation -- cevans
- */
- as->suser = capable(CAP_SYS_ADMIN);
- as->writer = (filp->f_mode & FMODE_WRITE) == FMODE_WRITE;
- as->reader = (filp->f_mode & FMODE_READ) == FMODE_READ;
- spin_lock(&user_list_lock);
- as->next = user_list;
- user_list = as;
- spin_unlock(&user_list_lock);
- filp->private_data = as;
- return 0;
-}
-
-static int apm_get_info(char *buf, char **start, off_t fpos, int length)
-{
- char * p;
- unsigned short bx;
- unsigned short cx;
- unsigned short dx;
- int error;
- unsigned short ac_line_status = 0xff;
- unsigned short battery_status = 0xff;
- unsigned short battery_flag = 0xff;
- int percentage = -1;
- int time_units = -1;
- char *units = "?";
-
- p = buf;
-
- if ((num_online_cpus() == 1) &&
- !(error = apm_get_power_status(&bx, &cx, &dx))) {
- ac_line_status = (bx >> 8) & 0xff;
- battery_status = bx & 0xff;
- if ((cx & 0xff) != 0xff)
- percentage = cx & 0xff;
-
- if (apm_info.connection_version > 0x100) {
- battery_flag = (cx >> 8) & 0xff;
- if (dx != 0xffff) {
- units = (dx & 0x8000) ? "min" : "sec";
- time_units = dx & 0x7fff;
- }
- }
- }
- /* Arguments, with symbols from linux/apm_bios.h. Information is
- from the Get Power Status (0x0a) call unless otherwise noted.
-
- 0) Linux driver version (this will change if format changes)
- 1) APM BIOS Version. Usually 1.0, 1.1 or 1.2.
- 2) APM flags from APM Installation Check (0x00):
- bit 0: APM_16_BIT_SUPPORT
- bit 1: APM_32_BIT_SUPPORT
- bit 2: APM_IDLE_SLOWS_CLOCK
- bit 3: APM_BIOS_DISABLED
- bit 4: APM_BIOS_DISENGAGED
- 3) AC line status
- 0x00: Off-line
- 0x01: On-line
- 0x02: On backup power (BIOS >= 1.1 only)
- 0xff: Unknown
- 4) Battery status
- 0x00: High
- 0x01: Low
- 0x02: Critical
- 0x03: Charging
- 0x04: Selected battery not present (BIOS >= 1.2 only)
- 0xff: Unknown
- 5) Battery flag
- bit 0: High
- bit 1: Low
- bit 2: Critical
- bit 3: Charging
- bit 7: No system battery
- 0xff: Unknown
- 6) Remaining battery life (percentage of charge):
- 0-100: valid
- -1: Unknown
- 7) Remaining battery life (time units):
- Number of remaining minutes or seconds
- -1: Unknown
- 8) min = minutes; sec = seconds */
-
- p += sprintf(p, "%s %d.%d 0x%02x 0x%02x 0x%02x 0x%02x %d%% %d %s\n",
- driver_version,
- (apm_info.bios.version >> 8) & 0xff,
- apm_info.bios.version & 0xff,
- apm_info.bios.flags,
- ac_line_status,
- battery_status,
- battery_flag,
- percentage,
- time_units,
- units);
-
- return p - buf;
-}
-
-static int apm(void *unused)
-{
- unsigned short bx;
- unsigned short cx;
- unsigned short dx;
- int error;
- char * power_stat;
- char * bat_stat;
-
- kapmd_running = 1;
-
- daemonize("kapmd");
-
- current->flags |= PF_NOFREEZE;
-
-#ifdef CONFIG_SMP
- /* 2002/08/01 - WT
- * This is to avoid random crashes at boot time during initialization
- * on SMP systems in case of "apm=power-off" mode. Seen on ASUS A7M266D.
- * Some bioses don't like being called from CPU != 0.
- * Method suggested by Ingo Molnar.
- */
- set_cpus_allowed(current, cpumask_of_cpu(0));
- BUG_ON(smp_processor_id() != 0);
-#endif
-
- if (apm_info.connection_version == 0) {
- apm_info.connection_version = apm_info.bios.version;
- if (apm_info.connection_version > 0x100) {
- /*
- * We only support BIOSs up to version 1.2
- */
- if (apm_info.connection_version > 0x0102)
- apm_info.connection_version = 0x0102;
- error = apm_driver_version(&apm_info.connection_version);
- if (error != APM_SUCCESS) {
- apm_error("driver version", error);
- /* Fall back to an APM 1.0 connection. */
- apm_info.connection_version = 0x100;
- }
- }
- }
-
- if (debug)
- printk(KERN_INFO "apm: Connection version %d.%d\n",
- (apm_info.connection_version >> 8) & 0xff,
- apm_info.connection_version & 0xff);
-
-#ifdef CONFIG_APM_DO_ENABLE
- if (apm_info.bios.flags & APM_BIOS_DISABLED) {
- /*
- * This call causes my NEC UltraLite Versa 33/C to hang if it
- * is booted with PM disabled but not in the docking station.
- * Unfortunate ...
- */
- error = apm_enable_power_management(1);
- if (error) {
- apm_error("enable power management", error);
- return -1;
- }
- }
-#endif
-
- if ((apm_info.bios.flags & APM_BIOS_DISENGAGED)
- && (apm_info.connection_version > 0x0100)) {
- error = apm_engage_power_management(APM_DEVICE_ALL, 1);
- if (error) {
- apm_error("engage power management", error);
- return -1;
- }
- }
-
- if (debug && (num_online_cpus() == 1 || smp )) {
- error = apm_get_power_status(&bx, &cx, &dx);
- if (error)
- printk(KERN_INFO "apm: power status not available\n");
- else {
- switch ((bx >> 8) & 0xff) {
- case 0: power_stat = "off line"; break;
- case 1: power_stat = "on line"; break;
- case 2: power_stat = "on backup power"; break;
- default: power_stat = "unknown"; break;
- }
- switch (bx & 0xff) {
- case 0: bat_stat = "high"; break;
- case 1: bat_stat = "low"; break;
- case 2: bat_stat = "critical"; break;
- case 3: bat_stat = "charging"; break;
- default: bat_stat = "unknown"; break;
- }
- printk(KERN_INFO
- "apm: AC %s, battery status %s, battery life ",
- power_stat, bat_stat);
- if ((cx & 0xff) == 0xff)
- printk("unknown\n");
- else
- printk("%d%%\n", cx & 0xff);
- if (apm_info.connection_version > 0x100) {
- printk(KERN_INFO
- "apm: battery flag 0x%02x, battery life ",
- (cx >> 8) & 0xff);
- if (dx == 0xffff)
- printk("unknown\n");
- else
- printk("%d %s\n", dx & 0x7fff,
- (dx & 0x8000) ?
- "minutes" : "seconds");
- }
- }
- }
-
- /* Install our power off handler.. */
- if (power_off)
- pm_power_off = apm_power_off;
-
- if (num_online_cpus() == 1 || smp) {
-#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
- console_blank_hook = apm_console_blank;
-#endif
- apm_mainloop();
-#if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT)
- console_blank_hook = NULL;
-#endif
- }
- kapmd_running = 0;
-
- return 0;
-}
-
-#ifndef MODULE
-static int __init apm_setup(char *str)
-{
- int invert;
-
- while ((str != NULL) && (*str != '\0')) {
- if (strncmp(str, "off", 3) == 0)
- apm_disabled = 1;
- if (strncmp(str, "on", 2) == 0)
- apm_disabled = 0;
- if ((strncmp(str, "bounce-interval=", 16) == 0) ||
- (strncmp(str, "bounce_interval=", 16) == 0))
- bounce_interval = simple_strtol(str + 16, NULL, 0);
- if ((strncmp(str, "idle-threshold=", 15) == 0) ||
- (strncmp(str, "idle_threshold=", 15) == 0))
- idle_threshold = simple_strtol(str + 15, NULL, 0);
- if ((strncmp(str, "idle-period=", 12) == 0) ||
- (strncmp(str, "idle_period=", 12) == 0))
- idle_period = simple_strtol(str + 12, NULL, 0);
- invert = (strncmp(str, "no-", 3) == 0) ||
- (strncmp(str, "no_", 3) == 0);
- if (invert)
- str += 3;
- if (strncmp(str, "debug", 5) == 0)
- debug = !invert;
- if ((strncmp(str, "power-off", 9) == 0) ||
- (strncmp(str, "power_off", 9) == 0))
- power_off = !invert;
- if (strncmp(str, "smp", 3) == 0)
- {
- smp = !invert;
- idle_threshold = 100;
- }
- if ((strncmp(str, "allow-ints", 10) == 0) ||
- (strncmp(str, "allow_ints", 10) == 0))
- apm_info.allow_ints = !invert;
- if ((strncmp(str, "broken-psr", 10) == 0) ||
- (strncmp(str, "broken_psr", 10) == 0))
- apm_info.get_power_status_broken = !invert;
- if ((strncmp(str, "realmode-power-off", 18) == 0) ||
- (strncmp(str, "realmode_power_off", 18) == 0))
- apm_info.realmode_power_off = !invert;
- str = strchr(str, ',');
- if (str != NULL)
- str += strspn(str, ", \t");
- }
- return 1;
-}
-
-__setup("apm=", apm_setup);
-#endif
-
-static struct file_operations apm_bios_fops = {
- .owner = THIS_MODULE,
- .read = do_read,
- .poll = do_poll,
- .ioctl = do_ioctl,
- .open = do_open,
- .release = do_release,
-};
-
-static struct miscdevice apm_device = {
- APM_MINOR_DEV,
- "apm_bios",
- &apm_bios_fops
-};
-
-
-/* Simple "print if true" callback */
-static int __init print_if_true(struct dmi_system_id *d)
-{
- printk("%s\n", d->ident);
- return 0;
-}
-
-/*
- * Some Bioses enable the PS/2 mouse (touchpad) at resume, even if it was
- * disabled before the suspend. Linux used to get terribly confused by that.
- */
-static int __init broken_ps2_resume(struct dmi_system_id *d)
-{
- printk(KERN_INFO "%s machine detected. Mousepad Resume Bug workaround hopefully not needed.\n", d->ident);
- return 0;
-}
-
-/* Some bioses have a broken protected mode poweroff and need to use realmode */
-static int __init set_realmode_power_off(struct dmi_system_id *d)
-{
- if (apm_info.realmode_power_off == 0) {
- apm_info.realmode_power_off = 1;
- printk(KERN_INFO "%s bios detected. Using realmode poweroff only.\n", d->ident);
- }
- return 0;
-}
-
-/* Some laptops require interrupts to be enabled during APM calls */
-static int __init set_apm_ints(struct dmi_system_id *d)
-{
- if (apm_info.allow_ints == 0) {
- apm_info.allow_ints = 1;
- printk(KERN_INFO "%s machine detected. Enabling interrupts during APM calls.\n", d->ident);
- }
- return 0;
-}
-
-/* Some APM bioses corrupt memory or just plain do not work */
-static int __init apm_is_horked(struct dmi_system_id *d)
-{
- if (apm_info.disabled == 0) {
- apm_info.disabled = 1;
- printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident);
- }
- return 0;
-}
-
-static int __init apm_is_horked_d850md(struct dmi_system_id *d)
-{
- if (apm_info.disabled == 0) {
- apm_info.disabled = 1;
- printk(KERN_INFO "%s machine detected. Disabling APM.\n", d->ident);
- printk(KERN_INFO "This bug is fixed in bios P15 which is available for \n");
- printk(KERN_INFO "download from support.intel.com \n");
- }
- return 0;
-}
-
-/* Some APM bioses hang on APM idle calls */
-static int __init apm_likes_to_melt(struct dmi_system_id *d)
-{
- if (apm_info.forbid_idle == 0) {
- apm_info.forbid_idle = 1;
- printk(KERN_INFO "%s machine detected. Disabling APM idle calls.\n", d->ident);
- }
- return 0;
-}
-
-/*
- * Check for clue free BIOS implementations who use
- * the following QA technique
- *
- * [ Write BIOS Code ]<------
- * | ^
- * < Does it Compile >----N--
- * |Y ^
- * < Does it Boot Win98 >-N--
- * |Y
- * [Ship It]
- *
- * Phoenix A04 08/24/2000 is known bad (Dell Inspiron 5000e)
- * Phoenix A07 09/29/2000 is known good (Dell Inspiron 5000)
- */
-static int __init broken_apm_power(struct dmi_system_id *d)
-{
- apm_info.get_power_status_broken = 1;
- printk(KERN_WARNING "BIOS strings suggest APM bugs, disabling power status reporting.\n");
- return 0;
-}
-
-/*
- * This bios swaps the APM minute reporting bytes over (Many sony laptops
- * have this problem).
- */
-static int __init swab_apm_power_in_minutes(struct dmi_system_id *d)
-{
- apm_info.get_power_status_swabinminutes = 1;
- printk(KERN_WARNING "BIOS strings suggest APM reports battery life in minutes and wrong byte order.\n");
- return 0;
-}
-
-static struct dmi_system_id __initdata apm_dmi_table[] = {
- {
- print_if_true,
- KERN_WARNING "IBM T23 - BIOS 1.03b+ and controller firmware 1.02+ may be needed for Linux APM.",
- { DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
- DMI_MATCH(DMI_BIOS_VERSION, "1AET38WW (1.01b)"), },
- },
- { /* Handle problems with APM on the C600 */
- broken_ps2_resume, "Dell Latitude C600",
- { DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
- DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C600"), },
- },
- { /* Allow interrupts during suspend on Dell Latitude laptops*/
- set_apm_ints, "Dell Latitude",
- { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "Latitude C510"), }
- },
- { /* APM crashes */
- apm_is_horked, "Dell Inspiron 2500",
- { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"),
- DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
- },
- { /* Allow interrupts during suspend on Dell Inspiron laptops*/
- set_apm_ints, "Dell Inspiron", {
- DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 4000"), },
- },
- { /* Handle problems with APM on Inspiron 5000e */
- broken_apm_power, "Dell Inspiron 5000e",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "A04"),
- DMI_MATCH(DMI_BIOS_DATE, "08/24/2000"), },
- },
- { /* Handle problems with APM on Inspiron 2500 */
- broken_apm_power, "Dell Inspiron 2500",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "A12"),
- DMI_MATCH(DMI_BIOS_DATE, "02/04/2002"), },
- },
- { /* APM crashes */
- apm_is_horked, "Dell Dimension 4100",
- { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"),
- DMI_MATCH(DMI_BIOS_VENDOR,"Intel Corp."),
- DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
- },
- { /* Allow interrupts during suspend on Compaq Laptops*/
- set_apm_ints, "Compaq 12XL125",
- { DMI_MATCH(DMI_SYS_VENDOR, "Compaq"),
- DMI_MATCH(DMI_PRODUCT_NAME, "Compaq PC"),
- DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION,"4.06"), },
- },
- { /* Allow interrupts during APM or the clock goes slow */
- set_apm_ints, "ASUSTeK",
- { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK Computer Inc."),
- DMI_MATCH(DMI_PRODUCT_NAME, "L8400K series Notebook PC"), },
- },
- { /* APM blows on shutdown */
- apm_is_horked, "ABIT KX7-333[R]",
- { DMI_MATCH(DMI_BOARD_VENDOR, "ABIT"),
- DMI_MATCH(DMI_BOARD_NAME, "VT8367-8233A (KX7-333[R])"), },
- },
- { /* APM crashes */
- apm_is_horked, "Trigem Delhi3",
- { DMI_MATCH(DMI_SYS_VENDOR, "TriGem Computer, Inc"),
- DMI_MATCH(DMI_PRODUCT_NAME, "Delhi3"), },
- },
- { /* APM crashes */
- apm_is_horked, "Fujitsu-Siemens",
- { DMI_MATCH(DMI_BIOS_VENDOR, "hoenix/FUJITSU SIEMENS"),
- DMI_MATCH(DMI_BIOS_VERSION, "Version1.01"), },
- },
- { /* APM crashes */
- apm_is_horked_d850md, "Intel D850MD",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
- DMI_MATCH(DMI_BIOS_VERSION, "MV85010A.86A.0016.P07.0201251536"), },
- },
- { /* APM crashes */
- apm_is_horked, "Intel D810EMO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
- DMI_MATCH(DMI_BIOS_VERSION, "MO81010A.86A.0008.P04.0004170800"), },
- },
- { /* APM crashes */
- apm_is_horked, "Dell XPS-Z",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Intel Corp."),
- DMI_MATCH(DMI_BIOS_VERSION, "A11"),
- DMI_MATCH(DMI_PRODUCT_NAME, "XPS-Z"), },
- },
- { /* APM crashes */
- apm_is_horked, "Sharp PC-PJ/AX",
- { DMI_MATCH(DMI_SYS_VENDOR, "SHARP"),
- DMI_MATCH(DMI_PRODUCT_NAME, "PC-PJ/AX"),
- DMI_MATCH(DMI_BIOS_VENDOR,"SystemSoft"),
- DMI_MATCH(DMI_BIOS_VERSION,"Version R2.08"), },
- },
- { /* APM crashes */
- apm_is_horked, "Dell Inspiron 2500",
- { DMI_MATCH(DMI_SYS_VENDOR, "Dell Computer Corporation"),
- DMI_MATCH(DMI_PRODUCT_NAME, "Inspiron 2500"),
- DMI_MATCH(DMI_BIOS_VENDOR,"Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION,"A11"), },
- },
- { /* APM idle hangs */
- apm_likes_to_melt, "Jabil AMD",
- { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
- DMI_MATCH(DMI_BIOS_VERSION, "0AASNP06"), },
- },
- { /* APM idle hangs */
- apm_likes_to_melt, "AMI Bios",
- { DMI_MATCH(DMI_BIOS_VENDOR, "American Megatrends Inc."),
- DMI_MATCH(DMI_BIOS_VERSION, "0AASNP05"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-N505X(DE) */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0206H"),
- DMI_MATCH(DMI_BIOS_DATE, "08/23/99"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-N505VX */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "W2K06H0"),
- DMI_MATCH(DMI_BIOS_DATE, "02/03/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-XG29 */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0117A0"),
- DMI_MATCH(DMI_BIOS_DATE, "04/25/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-Z600NE */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0121Z1"),
- DMI_MATCH(DMI_BIOS_DATE, "05/11/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-Z600NE */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "WME01Z1"),
- DMI_MATCH(DMI_BIOS_DATE, "08/11/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-Z600LEK(DE) */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0206Z3"),
- DMI_MATCH(DMI_BIOS_DATE, "12/25/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-Z505LS */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0203D0"),
- DMI_MATCH(DMI_BIOS_DATE, "05/12/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-Z505LS */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0203Z3"),
- DMI_MATCH(DMI_BIOS_DATE, "08/25/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-Z505LS (with updated BIOS) */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0209Z3"),
- DMI_MATCH(DMI_BIOS_DATE, "05/12/01"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-F104K */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0204K2"),
- DMI_MATCH(DMI_BIOS_DATE, "08/28/00"), },
- },
-
- { /* Handle problems with APM on Sony Vaio PCG-C1VN/C1VE */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0208P1"),
- DMI_MATCH(DMI_BIOS_DATE, "11/09/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-C1VE */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "R0204P1"),
- DMI_MATCH(DMI_BIOS_DATE, "09/12/00"), },
- },
- { /* Handle problems with APM on Sony Vaio PCG-C1VE */
- swab_apm_power_in_minutes, "Sony VAIO",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
- DMI_MATCH(DMI_BIOS_VERSION, "WXPO1Z3"),
- DMI_MATCH(DMI_BIOS_DATE, "10/26/01"), },
- },
- { /* broken PM poweroff bios */
- set_realmode_power_off, "Award Software v4.60 PGMA",
- { DMI_MATCH(DMI_BIOS_VENDOR, "Award Software International, Inc."),
- DMI_MATCH(DMI_BIOS_VERSION, "4.60 PGMA"),
- DMI_MATCH(DMI_BIOS_DATE, "134526184"), },
- },
-
- /* Generic per vendor APM settings */
-
- { /* Allow interrupts during suspend on IBM laptops */
- set_apm_ints, "IBM",
- { DMI_MATCH(DMI_SYS_VENDOR, "IBM"), },
- },
-
- { }
-};
-
-/*
- * Just start the APM thread. We do NOT want to do APM BIOS
- * calls from anything but the APM thread, if for no other reason
- * than the fact that we don't trust the APM BIOS. This way,
- * most common APM BIOS problems that lead to protection errors
- * etc will have at least some level of being contained...
- *
- * In short, if something bad happens, at least we have a choice
- * of just killing the apm thread..
- */
-static int __init apm_init(void)
-{
- struct proc_dir_entry *apm_proc;
- int ret;
- int i;
-
- dmi_check_system(apm_dmi_table);
-
- if (apm_info.bios.version == 0) {
- printk(KERN_INFO "apm: BIOS not found.\n");
- return -ENODEV;
- }
- printk(KERN_INFO
- "apm: BIOS version %d.%d Flags 0x%02x (Driver version %s)\n",
- ((apm_info.bios.version >> 8) & 0xff),
- (apm_info.bios.version & 0xff),
- apm_info.bios.flags,
- driver_version);
- if ((apm_info.bios.flags & APM_32_BIT_SUPPORT) == 0) {
- printk(KERN_INFO "apm: no 32 bit BIOS support\n");
- return -ENODEV;
- }
-
- if (allow_ints)
- apm_info.allow_ints = 1;
- if (broken_psr)
- apm_info.get_power_status_broken = 1;
- if (realmode_power_off)
- apm_info.realmode_power_off = 1;
- /* User can override, but default is to trust DMI */
- if (apm_disabled != -1)
- apm_info.disabled = apm_disabled;
-
- /*
- * Fix for the Compaq Contura 3/25c which reports BIOS version 0.1
- * but is reportedly a 1.0 BIOS.
- */
- if (apm_info.bios.version == 0x001)
- apm_info.bios.version = 0x100;
-
- /* BIOS < 1.2 doesn't set cseg_16_len */
- if (apm_info.bios.version < 0x102)
- apm_info.bios.cseg_16_len = 0; /* 64k */
-
- if (debug) {
- printk(KERN_INFO "apm: entry %x:%lx cseg16 %x dseg %x",
- apm_info.bios.cseg, apm_info.bios.offset,
- apm_info.bios.cseg_16, apm_info.bios.dseg);
- if (apm_info.bios.version > 0x100)
- printk(" cseg len %x, dseg len %x",
- apm_info.bios.cseg_len,
- apm_info.bios.dseg_len);
- if (apm_info.bios.version > 0x101)
- printk(" cseg16 len %x", apm_info.bios.cseg_16_len);
- printk("\n");
- }
-
- if (apm_info.disabled) {
- printk(KERN_NOTICE "apm: disabled on user request.\n");
- return -ENODEV;
- }
- if ((num_online_cpus() > 1) && !power_off && !smp) {
- printk(KERN_NOTICE "apm: disabled - APM is not SMP safe.\n");
- apm_info.disabled = 1;
- return -ENODEV;
- }
- if (PM_IS_ACTIVE()) {
- printk(KERN_NOTICE "apm: overridden by ACPI.\n");
- apm_info.disabled = 1;
- return -ENODEV;
- }
- pm_active = 1;
-
- /*
- * Set up a segment that references the real mode segment 0x40
- * that extends up to the end of page zero (that we have reserved).
- * This is for buggy BIOS's that refer to (real mode) segment 0x40
- * even though they are called in protected mode.
- */
- set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
- _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
-
- apm_bios_entry.offset = apm_info.bios.offset;
- apm_bios_entry.segment = APM_CS;
-
- for (i = 0; i < NR_CPUS; i++) {
- set_base(get_cpu_gdt_table(i)[APM_CS >> 3],
- __va((unsigned long)apm_info.bios.cseg << 4));
- set_base(get_cpu_gdt_table(i)[APM_CS_16 >> 3],
- __va((unsigned long)apm_info.bios.cseg_16 << 4));
- set_base(get_cpu_gdt_table(i)[APM_DS >> 3],
- __va((unsigned long)apm_info.bios.dseg << 4));
-#ifndef APM_RELAX_SEGMENTS
- if (apm_info.bios.version == 0x100) {
-#endif
- /* For ASUS motherboard, Award BIOS rev 110 (and others?) */
- _set_limit((char *)&get_cpu_gdt_table(i)[APM_CS >> 3], 64 * 1024 - 1);
- /* For some unknown machine. */
- _set_limit((char *)&get_cpu_gdt_table(i)[APM_CS_16 >> 3], 64 * 1024 - 1);
- /* For the DEC Hinote Ultra CT475 (and others?) */
- _set_limit((char *)&get_cpu_gdt_table(i)[APM_DS >> 3], 64 * 1024 - 1);
-#ifndef APM_RELAX_SEGMENTS
- } else {
- _set_limit((char *)&get_cpu_gdt_table(i)[APM_CS >> 3],
- (apm_info.bios.cseg_len - 1) & 0xffff);
- _set_limit((char *)&get_cpu_gdt_table(i)[APM_CS_16 >> 3],
- (apm_info.bios.cseg_16_len - 1) & 0xffff);
- _set_limit((char *)&get_cpu_gdt_table(i)[APM_DS >> 3],
- (apm_info.bios.dseg_len - 1) & 0xffff);
- /* workaround for broken BIOSes */
- if (apm_info.bios.cseg_len <= apm_info.bios.offset)
- _set_limit((char *)&get_cpu_gdt_table(i)[APM_CS >> 3], 64 * 1024 -1);
- if (apm_info.bios.dseg_len <= 0x40) { /* 0x40 * 4kB == 64kB */
- /* for the BIOS that assumes granularity = 1 */
- get_cpu_gdt_table(i)[APM_DS >> 3].b |= 0x800000;
- printk(KERN_NOTICE "apm: we set the granularity of dseg.\n");
- }
- }
-#endif
- }
-
- apm_proc = create_proc_info_entry("apm", 0, NULL, apm_get_info);
- if (apm_proc)
- apm_proc->owner = THIS_MODULE;
-
- ret = kernel_thread(apm, NULL, CLONE_KERNEL | SIGCHLD);
- if (ret < 0) {
- printk(KERN_ERR "apm: disabled - Unable to start kernel thread.\n");
- return -ENOMEM;
- }
-
- if (num_online_cpus() > 1 && !smp ) {
- printk(KERN_NOTICE
- "apm: disabled - APM is not SMP safe (power off active).\n");
- return 0;
- }
-
- misc_register(&apm_device);
-
- if (HZ != 100)
- idle_period = (idle_period * HZ) / 100;
- if (idle_threshold < 100) {
- original_pm_idle = pm_idle;
- pm_idle = apm_cpu_idle;
- set_pm_idle = 1;
- }
-
- return 0;
-}
-
-static void __exit apm_exit(void)
-{
- int error;
-
- if (set_pm_idle) {
- pm_idle = original_pm_idle;
- /*
- * We are about to unload the current idle thread pm callback
- * (pm_idle), Wait for all processors to update cached/local
- * copies of pm_idle before proceeding.
- */
- cpu_idle_wait();
- }
- if (((apm_info.bios.flags & APM_BIOS_DISENGAGED) == 0)
- && (apm_info.connection_version > 0x0100)) {
- error = apm_engage_power_management(APM_DEVICE_ALL, 0);
- if (error)
- apm_error("disengage power management", error);
- }
- misc_deregister(&apm_device);
- remove_proc_entry("apm", NULL);
- if (power_off)
- pm_power_off = NULL;
- exit_kapmd = 1;
- while (kapmd_running)
- schedule();
- pm_active = 0;
-}
-
-module_init(apm_init);
-module_exit(apm_exit);
-
-MODULE_AUTHOR("Stephen Rothwell");
-MODULE_DESCRIPTION("Advanced Power Management");
-MODULE_LICENSE("GPL");
-module_param(debug, bool, 0644);
-MODULE_PARM_DESC(debug, "Enable debug mode");
-module_param(power_off, bool, 0444);
-MODULE_PARM_DESC(power_off, "Enable power off");
-module_param(bounce_interval, int, 0444);
-MODULE_PARM_DESC(bounce_interval,
- "Set the number of ticks to ignore suspend bounces");
-module_param(allow_ints, bool, 0444);
-MODULE_PARM_DESC(allow_ints, "Allow interrupts during BIOS calls");
-module_param(broken_psr, bool, 0444);
-MODULE_PARM_DESC(broken_psr, "BIOS has a broken GetPowerStatus call");
-module_param(realmode_power_off, bool, 0444);
-MODULE_PARM_DESC(realmode_power_off,
- "Switch to real mode before powering off");
-module_param(idle_threshold, int, 0444);
-MODULE_PARM_DESC(idle_threshold,
- "System idle percentage above which to make APM BIOS idle calls");
-module_param(idle_period, int, 0444);
-MODULE_PARM_DESC(idle_period,
- "Period (in sec/100) over which to caculate the idle percentage");
-module_param(smp, bool, 0444);
-MODULE_PARM_DESC(smp,
- "Set this to enable APM use on an SMP platform. Use with caution on older systems");
-MODULE_ALIAS_MISCDEV(APM_MINOR_DEV);
struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
-extern void mcheck_init(struct cpuinfo_x86 *c);
-
extern void machine_specific_modify_cpu_capabilities(struct cpuinfo_x86 *c);
extern int disable_pse;
cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
c->x86 = (tfms >> 8) & 15;
c->x86_model = (tfms >> 4) & 15;
- if (c->x86 == 0xf) {
+ if (c->x86 == 0xf)
c->x86 += (tfms >> 20) & 0xff;
+ if (c->x86 >= 0x6)
c->x86_model += ((tfms >> 16) & 0xF) << 4;
- }
c->x86_mask = tfms & 15;
if (cap0 & (1<<19))
c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
c->x86_model = c->x86_mask = 0; /* So far unknown... */
c->x86_vendor_id[0] = '\0'; /* Unset */
c->x86_model_id[0] = '\0'; /* Unset */
- c->x86_num_cores = 1;
+ c->x86_max_cores = 1;
memset(&c->x86_capability, 0, sizeof c->x86_capability);
if (!have_cpuid_p()) {
}
/* Init Machine Check Exception if available. */
-#ifdef CONFIG_X86_MCE
mcheck_init(c);
-#endif
+
if (c == &boot_cpu_data)
sysenter_setup();
enable_sep_cpu();
void __devinit detect_ht(struct cpuinfo_x86 *c)
{
u32 eax, ebx, ecx, edx;
- int index_msb, tmp;
+ int index_msb, core_bits;
int cpu = smp_processor_id();
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+
+ c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0);
+
if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
return;
- cpuid(1, &eax, &ebx, &ecx, &edx);
smp_num_siblings = (ebx & 0xff0000) >> 16;
if (smp_num_siblings == 1) {
printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
} else if (smp_num_siblings > 1 ) {
- index_msb = 31;
if (smp_num_siblings > NR_CPUS) {
printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
smp_num_siblings = 1;
return;
}
- tmp = smp_num_siblings;
- while ((tmp & 0x80000000 ) == 0) {
- tmp <<=1 ;
- index_msb--;
- }
- if (smp_num_siblings & (smp_num_siblings - 1))
- index_msb++;
+
+ index_msb = get_count_order(smp_num_siblings);
phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
phys_proc_id[cpu]);
- smp_num_siblings = smp_num_siblings / c->x86_num_cores;
+ smp_num_siblings = smp_num_siblings / c->x86_max_cores;
- tmp = smp_num_siblings;
- index_msb = 31;
- while ((tmp & 0x80000000) == 0) {
- tmp <<=1 ;
- index_msb--;
- }
+ index_msb = get_count_order(smp_num_siblings) ;
- if (smp_num_siblings & (smp_num_siblings - 1))
- index_msb++;
+ core_bits = get_count_order(c->x86_max_cores);
- cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
+ cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) &
+ ((1 << core_bits) - 1);
- if (c->x86_num_cores > 1)
+ if (c->x86_max_cores > 1)
printk(KERN_INFO "CPU: Processor Core ID: %d\n",
cpu_core_id[cpu]);
}
set_in_cr4(X86_CR4_TSD);
}
- /*
- * Set up the per-thread TLS descriptor cache:
- */
- memcpy(thread->tls_array, &get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN],
- GDT_ENTRY_TLS_ENTRIES * 8);
-
cpu_gdt_init(&cpu_gdt_descr[cpu]);
/*
nmi_debug_stack_check:
cmpw $__KERNEL_CS,16(%esp)
jne nmi_stack_correct
- cmpl $debug - 1,(%esp)
- jle nmi_stack_correct
+ cmpl $debug,(%esp)
+ jb nmi_stack_correct
cmpl $debug_esp_fix_insn,(%esp)
- jle nmi_debug_stack_fixup
-nmi_debug_stack_fixup:
+ ja nmi_stack_correct
FIX_STACK(24,nmi_stack_correct, 1)
jmp nmi_stack_correct
int (*ioapic_renumber_irq)(int ioapic, int irq);
atomic_t irq_mis_count;
+/* Where if anywhere is the i8259 connect in external int mode */
+static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
+
static DEFINE_SPINLOCK(ioapic_lock);
/*
}
#ifndef CONFIG_XEN
+#endif
/*
* Find the pin to which IRQ[irq] (ISA) is connected
*/
-static int find_isa_irq_pin(int irq, int type)
+static int __init find_isa_irq_pin(int irq, int type)
{
int i;
}
return -1;
}
-#endif
+
+static int __init find_isa_irq_apic(int irq, int type)
+{
+ int i;
+
+ for (i = 0; i < mp_irq_entries; i++) {
+ int lbus = mp_irqs[i].mpc_srcbus;
+
+ if ((mp_bus_id_to_type[lbus] == MP_BUS_ISA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_EISA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_MCA ||
+ mp_bus_id_to_type[lbus] == MP_BUS_NEC98
+ ) &&
+ (mp_irqs[i].mpc_irqtype == type) &&
+ (mp_irqs[i].mpc_srcbusirq == irq))
+ break;
+ }
+ if (i < mp_irq_entries) {
+ int apic;
+ for(apic = 0; apic < nr_ioapics; apic++) {
+ if (mp_ioapics[apic].mpc_apicid == mp_irqs[i].mpc_dstapic)
+ return apic;
+ }
+ }
+
+ return -1;
+}
/*
* Find a specific PCI IRQ entry.
* Set up the 8259A-master output pin:
*/
#ifndef CONFIG_XEN
-static void __init setup_ExtINT_IRQ0_pin(unsigned int pin, int vector)
+static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
{
struct IO_APIC_route_entry entry;
unsigned long flags;
* Add it to the IO-APIC irq-routing table:
*/
spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+ io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
+ io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
spin_unlock_irqrestore(&ioapic_lock, flags);
enable_8259A_irq(0);
static void __init enable_IO_APIC(void)
{
union IO_APIC_reg_01 reg_01;
- int i;
+ int i8259_apic, i8259_pin;
+ int i, apic;
unsigned long flags;
for (i = 0; i < PIN_MAP_SIZE; i++) {
/*
* The number of IO-APIC IRQ registers (== #pins):
*/
- for (i = 0; i < nr_ioapics; i++) {
+ for (apic = 0; apic < nr_ioapics; apic++) {
spin_lock_irqsave(&ioapic_lock, flags);
- reg_01.raw = io_apic_read(i, 1);
+ reg_01.raw = io_apic_read(apic, 1);
spin_unlock_irqrestore(&ioapic_lock, flags);
- nr_ioapic_registers[i] = reg_01.bits.entries+1;
+ nr_ioapic_registers[apic] = reg_01.bits.entries+1;
+ }
+ for(apic = 0; apic < nr_ioapics; apic++) {
+ int pin;
+ /* See if any of the pins is in ExtINT mode */
+ for(pin = 0; pin < nr_ioapic_registers[i]; pin++) {
+ struct IO_APIC_route_entry entry;
+ spin_lock_irqsave(&ioapic_lock, flags);
+ *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
+ *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+ spin_unlock_irqrestore(&ioapic_lock, flags);
+
+
+ /* If the interrupt line is enabled and in ExtInt mode
+ * I have found the pin where the i8259 is connected.
+ */
+ if ((entry.mask == 0) && (entry.delivery_mode == dest_ExtINT)) {
+ ioapic_i8259.apic = apic;
+ ioapic_i8259.pin = pin;
+ goto found_i8259;
+ }
+ }
+ }
+ found_i8259:
+ /* Look to see what if the MP table has reported the ExtINT */
+ /* If we could not find the appropriate pin by looking at the ioapic
+ * the i8259 probably is not connected the ioapic but give the
+ * mptable a chance anyway.
+ */
+ i8259_pin = find_isa_irq_pin(0, mp_ExtINT);
+ i8259_apic = find_isa_irq_apic(0, mp_ExtINT);
+ /* Trust the MP table if nothing is setup in the hardware */
+ if ((ioapic_i8259.pin == -1) && (i8259_pin >= 0)) {
+ printk(KERN_WARNING "ExtINT not setup in hardware but reported by MP table\n");
+ ioapic_i8259.pin = i8259_pin;
+ ioapic_i8259.apic = i8259_apic;
+ }
+ /* Complain if the MP table and the hardware disagree */
+ if (((ioapic_i8259.apic != i8259_apic) || (ioapic_i8259.pin != i8259_pin)) &&
+ (i8259_pin >= 0) && (ioapic_i8259.pin >= 0))
+ {
+ printk(KERN_WARNING "ExtINT in hardware and MP table differ\n");
}
/*
*/
void disable_IO_APIC(void)
{
-#ifndef CONFIG_XEN
- int pin;
-#endif
/*
* Clear the IO-APIC before rebooting:
*/
* Put that IOAPIC in virtual wire mode
* so legacy interrupts can be delivered.
*/
- pin = find_isa_irq_pin(0, mp_ExtINT);
- if (pin != -1) {
+ if (ioapic_i8259.pin != -1) {
struct IO_APIC_route_entry entry;
unsigned long flags;
entry.polarity = 0; /* High */
entry.delivery_status = 0;
entry.dest_mode = 0; /* Physical */
- entry.delivery_mode = 7; /* ExtInt */
+ entry.delivery_mode = dest_ExtINT; /* ExtInt */
entry.vector = 0;
entry.dest.physical.physical_dest = 0;
* Add it to the IO-APIC irq-routing table:
*/
spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(0, 0x11+2*pin, *(((int *)&entry)+1));
- io_apic_write(0, 0x10+2*pin, *(((int *)&entry)+0));
+ io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
+ *(((int *)&entry)+1));
+ io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
+ *(((int *)&entry)+0));
spin_unlock_irqrestore(&ioapic_lock, flags);
}
- disconnect_bsp_APIC(pin != -1);
+ disconnect_bsp_APIC(ioapic_i8259.pin != -1);
#endif
}
{
int irq = vector_to_irq(vector);
- move_irq(vector);
+ move_native_irq(vector);
ack_edge_ioapic_irq(irq);
}
{
int irq = vector_to_irq(vector);
- move_irq(vector);
+ move_native_irq(vector);
end_level_ioapic_irq(irq);
}
*/
static inline void unlock_ExtINT_logic(void)
{
- int pin, i;
+ int apic, pin, i;
struct IO_APIC_route_entry entry0, entry1;
unsigned char save_control, save_freq_select;
unsigned long flags;
- pin = find_isa_irq_pin(8, mp_INT);
+ pin = find_isa_irq_pin(8, mp_INT);
+ apic = find_isa_irq_apic(8, mp_INT);
if (pin == -1)
return;
spin_lock_irqsave(&ioapic_lock, flags);
- *(((int *)&entry0) + 1) = io_apic_read(0, 0x11 + 2 * pin);
- *(((int *)&entry0) + 0) = io_apic_read(0, 0x10 + 2 * pin);
+ *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
+ *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
spin_unlock_irqrestore(&ioapic_lock, flags);
- clear_IO_APIC_pin(0, pin);
+ clear_IO_APIC_pin(apic, pin);
memset(&entry1, 0, sizeof(entry1));
entry1.vector = 0;
spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
- io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
+ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
+ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
spin_unlock_irqrestore(&ioapic_lock, flags);
save_control = CMOS_READ(RTC_CONTROL);
CMOS_WRITE(save_control, RTC_CONTROL);
CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
- clear_IO_APIC_pin(0, pin);
+ clear_IO_APIC_pin(apic, pin);
spin_lock_irqsave(&ioapic_lock, flags);
- io_apic_write(0, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
- io_apic_write(0, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
+ io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
+ io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
spin_unlock_irqrestore(&ioapic_lock, flags);
}
*/
static inline void check_timer(void)
{
- int pin1, pin2;
+ int apic1, pin1, apic2, pin2;
int vector;
/*
timer_ack = 1;
enable_8259A_irq(0);
- pin1 = find_isa_irq_pin(0, mp_INT);
- pin2 = find_isa_irq_pin(0, mp_ExtINT);
+ pin1 = find_isa_irq_pin(0, mp_INT);
+ apic1 = find_isa_irq_apic(0, mp_INT);
+ pin2 = ioapic_i8259.pin;
+ apic2 = ioapic_i8259.apic;
- printk(KERN_INFO "..TIMER: vector=0x%02X pin1=%d pin2=%d\n", vector, pin1, pin2);
+ printk(KERN_INFO "..TIMER: vector=0x%02X apic1=%d pin1=%d apic2=%d pin2=%d\n",
+ vector, apic1, pin1, apic2, pin2);
if (pin1 != -1) {
/*
clear_IO_APIC_pin(0, pin1);
return;
}
- clear_IO_APIC_pin(0, pin1);
- printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to IO-APIC\n");
+ clear_IO_APIC_pin(apic1, pin1);
+ printk(KERN_ERR "..MP-BIOS bug: 8254 timer not connected to "
+ "IO-APIC\n");
}
printk(KERN_INFO "...trying to set up timer (IRQ0) through the 8259A ... ");
/*
* legacy devices should be connected to IO APIC #0
*/
- setup_ExtINT_IRQ0_pin(pin2, vector);
+ setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
if (timer_irq_works()) {
printk("works.\n");
if (pin1 != -1)
- replace_pin_at_irq(0, 0, pin1, 0, pin2);
+ replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
else
- add_pin_to_irq(0, 0, pin2);
+ add_pin_to_irq(0, apic2, pin2);
if (nmi_watchdog == NMI_IO_APIC) {
setup_nmi();
}
/*
* Cleanup, just in case ...
*/
- clear_IO_APIC_pin(0, pin2);
+ clear_IO_APIC_pin(apic2, pin2);
}
printk(" failed.\n");
if (i == 0) {
seq_printf(p, " ");
- for_each_cpu(j)
+ for_each_online_cpu(j)
seq_printf(p, "CPU%d ",j);
seq_putc(p, '\n');
}
#ifndef CONFIG_SMP
seq_printf(p, "%10u ", kstat_irqs(i));
#else
- for_each_cpu(j)
+ for_each_online_cpu(j)
seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
#endif
seq_printf(p, " %14s", irq_desc[i].handler->typename);
spin_unlock_irqrestore(&irq_desc[i].lock, flags);
} else if (i == NR_IRQS) {
seq_printf(p, "NMI: ");
- for_each_cpu(j)
+ for_each_online_cpu(j)
seq_printf(p, "%10u ", nmi_count(j));
seq_putc(p, '\n');
#ifdef CONFIG_X86_LOCAL_APIC
seq_printf(p, "LOC: ");
- for_each_cpu(j)
+ for_each_online_cpu(j)
seq_printf(p, "%10u ",
per_cpu(irq_stat,j).apic_timer_irqs);
seq_putc(p, '\n');
#include <asm/system.h>
#include <asm/ldt.h>
#include <asm/desc.h>
+#include <asm/mmu_context.h>
#ifdef CONFIG_SMP /* avoids "defined but not used" warnig */
static void flush_ldt(void *null)
/* Processor that is doing the boot up */
unsigned int boot_cpu_physical_apicid = -1U;
/* Internal processor count */
-static unsigned int __initdata num_processors;
+static unsigned int __devinitdata num_processors;
/* Bitmask of physically existing CPUs */
physid_mask_t phys_cpu_present_map;
#endif
#ifndef CONFIG_XEN
-static void __init MP_processor_info (struct mpc_config_processor *m)
+static void __devinit MP_processor_info (struct mpc_config_processor *m)
{
int ver, apicid;
physid_mask_t phys_cpu;
boot_cpu_physical_apicid = m->mpc_apicid;
}
- if (num_processors >= NR_CPUS) {
- printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
- " Processor ignored.\n", NR_CPUS);
- return;
- }
-
- if (num_processors >= maxcpus) {
- printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
- " Processor ignored.\n", maxcpus);
- return;
- }
ver = m->mpc_apicver;
if (!MP_valid_apicid(apicid, ver)) {
return;
}
- cpu_set(num_processors, cpu_possible_map);
- num_processors++;
- phys_cpu = apicid_to_cpu_present(apicid);
- physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
-
/*
* Validate version
*/
ver = 0x10;
}
apic_version[m->mpc_apicid] = ver;
+
+ phys_cpu = apicid_to_cpu_present(apicid);
+ physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
+
+ if (num_processors >= NR_CPUS) {
+ printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
+ " Processor ignored.\n", NR_CPUS);
+ return;
+ }
+
+ if (num_processors >= maxcpus) {
+ printk(KERN_WARNING "WARNING: maxcpus limit of %i reached."
+ " Processor ignored.\n", maxcpus);
+ return;
+ }
+
+ cpu_set(num_processors, cpu_possible_map);
+ num_processors++;
+
if ((num_processors > 8) &&
- APIC_XAPIC(ver) &&
- (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL))
+ ((APIC_XAPIC(ver) &&
+ (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)) ||
+ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)))
def_to_bigsmp = 1;
else
def_to_bigsmp = 0;
}
-void __init mp_register_lapic (
+void __devinit mp_register_lapic (
u8 id,
u8 enabled)
{
if (need_resched()) {
local_irq_enable();
} else {
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb__after_clear_bit();
stop_hz_timer();
/* Blocking includes an implicit local_irq_enable(). */
HYPERVISOR_sched_op(SCHEDOP_block, 0);
start_hz_timer();
+ set_thread_flag(TIF_POLLING_NRFLAG);
}
}
#ifdef CONFIG_APM_MODULE
EXPORT_SYMBOL(default_idle);
#endif
+#ifdef CONFIG_HOTPLUG_CPU
+static inline void play_dead(void)
+{
+ HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL);
+ local_irq_enable();
+}
+#else
+static inline void play_dead(void)
+{
+ BUG();
+}
+#endif /* CONFIG_HOTPLUG_CPU */
+
/*
* The idle thread. There's no useful work to be
* done, so just try to conserve power and have a
*/
void cpu_idle(void)
{
-#if defined(CONFIG_HOTPLUG_CPU)
- int cpu = raw_smp_processor_id();
-#endif
+ int cpu = smp_processor_id();
+
+ set_thread_flag(TIF_POLLING_NRFLAG);
/* endless idle loop with no priority at all */
while (1) {
rmb();
-#if defined(CONFIG_HOTPLUG_CPU)
- if (cpu_is_offline(cpu)) {
- HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL);
- local_irq_enable();
- }
-#endif
+ if (cpu_is_offline(cpu))
+ play_dead();
__get_cpu_var(irq_stat).idle_timestamp = jiffies;
xen_idle();
}
+ preempt_enable_no_resched();
schedule();
+ preempt_disable();
}
}
void show_regs(struct pt_regs * regs)
{
+ unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L;
+
printk("\n");
printk("Pid: %d, comm: %20s\n", current->pid, current->comm);
printk("EIP: %04x:[<%08lx>] CPU: %d\n",0xffff & regs->xcs,regs->eip, smp_processor_id());
printk(" DS: %04x ES: %04x\n",
0xffff & regs->xds,0xffff & regs->xes);
+ cr0 = read_cr0();
+ cr2 = read_cr2();
+ cr3 = read_cr3();
+ if (current_cpu_data.x86 > 4) {
+ cr4 = read_cr4();
+ }
+ printk("CR0: %08lx CR2: %08lx CR3: %08lx CR4: %08lx\n", cr0, cr2, cr3, cr4);
show_trace(NULL, ®s->esp);
}
{
struct task_struct *tsk = current;
- /*
- * Remove function-return probe instances associated with this task
- * and put them back on the free list. Do not insert an exit probe for
- * this function, it will be disabled by kprobe_flush_task if you do.
- */
- kprobe_flush_task(tsk);
-
memset(tsk->thread.debugreg, 0, sizeof(unsigned long)*8);
memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
/*
struct pt_regs ptregs;
ptregs = *(struct pt_regs *)
- ((unsigned long)tsk->thread_info+THREAD_SIZE - sizeof(ptregs));
+ ((unsigned long)tsk->thread_info +
+ /* see comments in copy_thread() about -8 */
+ THREAD_SIZE - sizeof(ptregs) - 8);
ptregs.xcs &= 0xffff;
ptregs.xds &= 0xffff;
ptregs.xes &= 0xffff;
elf_core_copy_regs(regs, &ptregs);
- boot_option_idle_override = 1;
return 1;
}
EXPORT_SYMBOL(drive_info);
#endif
struct screen_info screen_info;
-#ifdef CONFIG_VT
EXPORT_SYMBOL(screen_info);
-#endif
struct apm_info apm_info;
EXPORT_SYMBOL(apm_info);
struct sys_desc_table_struct {
}
}
for (i = 0; i < e820.nr_map; i++) {
- if (e820.map[i].type == E820_RAM) {
- current_addr = e820.map[i].addr + e820.map[i].size;
- if (current_addr >= size) {
- e820.map[i].size -= current_addr-size;
- e820.nr_map = i + 1;
- return;
- }
+ current_addr = e820.map[i].addr + e820.map[i].size;
+ if (current_addr < size)
+ continue;
+
+ if (e820.map[i].type != E820_RAM)
+ continue;
+
+ if (e820.map[i].addr >= size) {
+ /*
+ * This region starts past the end of the
+ * requested size, skip it completely.
+ */
+ e820.nr_map = i;
+ } else {
+ e820.nr_map = i + 1;
+ e820.map[i].size -= current_addr - size;
}
+ return;
}
}
/* Package ID of each logical CPU */
int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-EXPORT_SYMBOL(phys_proc_id);
/* Core ID of each logical CPU */
int cpu_core_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
-EXPORT_SYMBOL(cpu_core_id);
+/* representing HT siblings of each logical CPU */
cpumask_t cpu_sibling_map[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(cpu_sibling_map);
+/* representing HT and core siblings of each logical CPU */
cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
EXPORT_SYMBOL(cpu_core_map);
cpumask_t cpu_callin_map;
cpumask_t cpu_callout_map;
EXPORT_SYMBOL(cpu_callout_map);
+#ifdef CONFIG_HOTPLUG_CPU
+cpumask_t cpu_possible_map = CPU_MASK_ALL;
+#else
cpumask_t cpu_possible_map;
+#endif
EXPORT_SYMBOL(cpu_possible_map);
static cpumask_t smp_commenced_mask;
static int cpucount;
+/* representing cpus for which sibling maps can be computed */
+static cpumask_t cpu_sibling_setup_map;
+
static inline void
set_cpu_sibling_map(int cpu)
{
int i;
+ struct cpuinfo_x86 *c = cpu_data;
+
+ cpu_set(cpu, cpu_sibling_setup_map);
if (smp_num_siblings > 1) {
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- if (cpu_core_id[cpu] == cpu_core_id[i]) {
+ for_each_cpu_mask(i, cpu_sibling_setup_map) {
+ if (phys_proc_id[cpu] == phys_proc_id[i] &&
+ cpu_core_id[cpu] == cpu_core_id[i]) {
cpu_set(i, cpu_sibling_map[cpu]);
cpu_set(cpu, cpu_sibling_map[i]);
+ cpu_set(i, cpu_core_map[cpu]);
+ cpu_set(cpu, cpu_core_map[i]);
}
}
} else {
cpu_set(cpu, cpu_sibling_map[cpu]);
}
- if (current_cpu_data.x86_num_cores > 1) {
- for (i = 0; i < NR_CPUS; i++) {
- if (!cpu_isset(i, cpu_callout_map))
- continue;
- if (phys_proc_id[cpu] == phys_proc_id[i]) {
- cpu_set(i, cpu_core_map[cpu]);
- cpu_set(cpu, cpu_core_map[i]);
- }
- }
- } else {
+ if (current_cpu_data.x86_max_cores == 1) {
cpu_core_map[cpu] = cpu_sibling_map[cpu];
+ c[cpu].booted_cores = 1;
+ return;
+ }
+
+ for_each_cpu_mask(i, cpu_sibling_setup_map) {
+ if (phys_proc_id[cpu] == phys_proc_id[i]) {
+ cpu_set(i, cpu_core_map[cpu]);
+ cpu_set(cpu, cpu_core_map[i]);
+ /*
+ * Does this new cpu bringup a new core?
+ */
+ if (cpus_weight(cpu_sibling_map[cpu]) == 1) {
+ /*
+ * for each core in package, increment
+ * the booted_cores for this new cpu
+ */
+ if (first_cpu(cpu_sibling_map[i]) == i)
+ c[cpu].booted_cores++;
+ /*
+ * increment the core count for all
+ * the other cpus in this package
+ */
+ if (i != cpu)
+ c[i].booted_cores++;
+ } else if (i != cpu && !c[cpu].booted_cores)
+ c[cpu].booted_cores = c[i].booted_cores;
+ }
}
}
* things done here to the most necessary things.
*/
cpu_init();
+ preempt_disable();
smp_callin();
while (!cpu_isset(smp_processor_id(), smp_commenced_mask))
rep_nop();
printk("Inquiring remote APIC #%d...\n", apicid);
- for (i = 0; i < sizeof(regs) / sizeof(*regs); i++) {
+ for (i = 0; i < ARRAY_SIZE(regs); i++) {
printk("... APIC #%d %s: ", apicid, names[i]);
/*
current_thread_info()->cpu = 0;
smp_tune_scheduling();
- cpus_clear(cpu_sibling_map[0]);
- cpu_set(0, cpu_sibling_map[0]);
- cpus_clear(cpu_core_map[0]);
- cpu_set(0, cpu_core_map[0]);
+ set_cpu_sibling_map(0);
/*
* If we couldn't find an SMP configuration at boot time,
remove_siblinginfo(int cpu)
{
int sibling;
+ struct cpuinfo_x86 *c = cpu_data;
+ for_each_cpu_mask(sibling, cpu_core_map[cpu]) {
+ cpu_clear(cpu, cpu_core_map[sibling]);
+ /*
+ * last thread sibling in this cpu core going down
+ */
+ if (cpus_weight(cpu_sibling_map[cpu]) == 1)
+ c[sibling].booted_cores--;
+ }
+
for_each_cpu_mask(sibling, cpu_sibling_map[cpu])
cpu_clear(cpu, cpu_sibling_map[sibling]);
- for_each_cpu_mask(sibling, cpu_core_map[cpu])
- cpu_clear(cpu, cpu_core_map[sibling]);
cpus_clear(cpu_sibling_map[cpu]);
cpus_clear(cpu_core_map[cpu]);
phys_proc_id[cpu] = BAD_APICID;
cpu_core_id[cpu] = BAD_APICID;
+ cpu_clear(cpu, cpu_sibling_setup_map);
}
int __cpu_disable(void)
if (cpu == 0)
return -EBUSY;
- /* We enable the timer again on the exit path of the death loop */
- disable_APIC_timer();
+ clear_local_APIC();
/* Allow any queued timer interrupts to get serviced */
local_irq_enable();
mdelay(1);
#include <asm/arch_hooks.h>
-#include "io_ports.h"
-
#include <xen/evtchn.h>
#if defined (__i386__)
int pit_latch_buggy; /* extern */
-u64 jiffies_64 = INITIAL_JIFFIES;
-
-EXPORT_SYMBOL(jiffies_64);
-
#if defined(__x86_64__)
unsigned long vxtime_hz = PIT_TICK_RATE;
struct vxtime_data __vxtime __section_vxtime; /* for vsyscalls */
cpu = smp_processor_id();
-#ifdef CONFIG_HOTPLUG_CPU
- if (!cpu_online(cpu)) {
- nmi_exit();
- return;
- }
-#endif
-
++nmi_count(cpu);
if (!rcu_dereference(nmi_callback)(regs, cpu))
tss->io_bitmap_max - thread->io_bitmap_max);
tss->io_bitmap_max = thread->io_bitmap_max;
tss->io_bitmap_base = IO_BITMAP_OFFSET;
+ tss->io_bitmap_owner = thread;
put_cpu();
return;
}
cpu = smp_processor_id();
-#ifdef CONFIG_HOTPLUG_CPU
- if (!cpu_online(cpu)) {
- nmi_exit();
- return;
- }
-#endif
-
++nmi_count(cpu);
if (!rcu_dereference(nmi_callback)(regs, cpu))
#
obj-y := setup.o topology.o
-
+
topology-y := ../mach-default/topology.o
#include <linux/highmem.h>
#include <linux/module.h>
#include <linux/kprobes.h>
-#include <linux/percpu.h>
#include <asm/system.h>
#include <asm/uaccess.h>
extern void die(const char *,struct pt_regs *,long);
-DEFINE_PER_CPU(pgd_t *, cur_pgd);
-
/*
* Unlock any spinlocks which will prevent us from getting the
* message out
desc = (void *)desc + (seg & ~7);
} else {
/* Must disable preemption while reading the GDT. */
- desc = (u32 *)get_cpu_gdt_table(get_cpu());
+ desc = (u32 *)get_cpu_gdt_table(get_cpu());
desc = (void *)desc + (seg & ~7);
}
unsigned long *p, page;
unsigned long mfn;
- preempt_disable();
- page = __pa(per_cpu(cur_pgd, smp_processor_id()));
- preempt_enable();
-
+ page = read_cr3();
p = (unsigned long *)__va(page);
p += (address >> 30) * 2;
printk(KERN_ALERT "%08lx -> *pde = %08lx:%08lx\n", page, p[1], p[0]);
{
unsigned long page;
- preempt_disable();
- page = ((unsigned long *) per_cpu(cur_pgd, smp_processor_id()))
- [address >> 22];
- preempt_enable();
-
- page = ((unsigned long *) per_cpu(cur_pgd, get_cpu()))
- [address >> 22];
+ page = read_cr3();
+ page = ((unsigned long *) __va(page))[address >> 22];
printk(KERN_ALERT "*pde = ma %08lx pa %08lx\n", page,
machine_to_phys(page));
/*
unsigned long address;
int write, si_code;
- address = HYPERVISOR_shared_info->vcpu_info[
- smp_processor_id()].arch.cr2;
+ /* get the address */
+ address = read_cr2();
/* Set the "privileged fault" bit to something sane. */
error_code &= ~4;
* an interrupt in the middle of a task switch..
*/
int index = pgd_index(address);
+ unsigned long pgd_paddr;
pgd_t *pgd, *pgd_k;
pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
pte_t *pte_k;
- preempt_disable();
- pgd = index + per_cpu(cur_pgd, smp_processor_id());
- preempt_enable();
+ pgd_paddr = read_cr3();
+ pgd = index + (pgd_t *)__va(pgd_paddr);
pgd_k = init_mm.pgd + index;
if (!pgd_present(*pgd_k))
#include <linux/slab.h>
#include <linux/proc_fs.h>
#include <linux/efi.h>
+#include <linux/memory_hotplug.h>
+#include <linux/initrd.h>
#include <asm/processor.h>
#include <asm/system.h>
pkmap_page_table = pte;
}
-void __init one_highpage_init(struct page *page, int pfn, int bad_ppro)
+static void __devinit free_new_highpage(struct page *page, int pfn)
+{
+ set_page_count(page, 1);
+ if (pfn < xen_start_info->nr_pages)
+ __free_page(page);
+ totalhigh_pages++;
+}
+
+void __init add_one_highpage_init(struct page *page, int pfn, int bad_ppro)
{
if (page_is_ram(pfn) && !(bad_ppro && page_kills_ppro(pfn))) {
ClearPageReserved(page);
- set_page_count(page, 1);
- if (pfn < xen_start_info->nr_pages)
- __free_page(page);
- totalhigh_pages++;
+ free_new_highpage(page, pfn);
} else
SetPageReserved(page);
}
+static int add_one_highpage_hotplug(struct page *page, unsigned long pfn)
+{
+ free_new_highpage(page, pfn);
+ totalram_pages++;
+#ifdef CONFIG_FLATMEM
+ max_mapnr = max(pfn, max_mapnr);
+#endif
+ num_physpages++;
+ return 0;
+}
+
+/*
+ * Not currently handling the NUMA case.
+ * Assuming single node and all memory that
+ * has been added dynamically that would be
+ * onlined here is in HIGHMEM
+ */
+void online_page(struct page *page)
+{
+ ClearPageReserved(page);
+ add_one_highpage_hotplug(page, page_to_pfn(page));
+}
+
+
#ifdef CONFIG_NUMA
extern void set_highmem_pages_init(int);
#else
{
int pfn;
for (pfn = highstart_pfn; pfn < highend_pfn; pfn++)
- one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
+ add_one_highpage_init(pfn_to_page(pfn), pfn, bad_ppro);
totalram_pages += totalhigh_pages;
}
#endif /* CONFIG_FLATMEM */
{
unsigned long vaddr;
pgd_t *pgd_base = (pgd_t *)xen_start_info->pt_base;
- int i;
swapper_pg_dir = pgd_base;
init_mm.pgd = pgd_base;
- for (i = 0; i < NR_CPUS; i++)
- per_cpu(cur_pgd, i) = pgd_base;
/* Enable PSE if available */
if (cpu_has_pse) {
set_bit(PG_pinned, &virt_to_page(init_mm.pgd)->flags);
}
+/*
+ * this is for the non-NUMA, single node SMP system case.
+ * Specifically, in the case of x86, we will always add
+ * memory to the highmem for now.
+ */
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+int add_memory(u64 start, u64 size)
+{
+ struct pglist_data *pgdata = &contig_page_data;
+ struct zone *zone = pgdata->node_zones + MAX_NR_ZONES-1;
+ unsigned long start_pfn = start >> PAGE_SHIFT;
+ unsigned long nr_pages = size >> PAGE_SHIFT;
+
+ return __add_pages(zone, start_pfn, nr_pages);
+}
+
+int remove_memory(u64 start, u64 size)
+{
+ return -EINVAL;
+}
+#endif
+
kmem_cache_t *pgd_cache;
kmem_cache_t *pmd_cache;
}
EXPORT_SYMBOL(ioremap_nocache);
+/**
+ * iounmap - Free a IO remapping
+ * @addr: virtual address from ioremap_*
+ *
+ * Caller must ensure there is only one unmapping for the same pointer.
+ */
void iounmap(volatile void __iomem *addr)
{
- struct vm_struct *p;
+ struct vm_struct *p, *o;
if ((void __force *)addr <= high_memory)
return;
if ((unsigned long) addr >= fix_to_virt(FIX_ISAMAP_BEGIN))
return;
- write_lock(&vmlist_lock);
- p = __remove_vm_area((void *)(PAGE_MASK & (unsigned long __force)addr));
- if (!p) {
- printk(KERN_WARNING "iounmap: bad address %p\n", addr);
+ addr = (volatile void __iomem *)(PAGE_MASK & (unsigned long __force)addr);
+
+ /* Use the vm area unlocked, assuming the caller
+ ensures there isn't another iounmap for the same address
+ in parallel. Reuse of the virtual address is prevented by
+ leaving it in the global lists until we're done with it.
+ cpa takes care of the direct mappings. */
+ read_lock(&vmlist_lock);
+ for (p = vmlist; p; p = p->next) {
+ if (p->addr == addr)
+ break;
+ }
+ read_unlock(&vmlist_lock);
+
+ if (!p) {
+ printk("iounmap: bad address %p\n", addr);
dump_stack();
- goto out_unlock;
+ return;
}
+ /* Reset the direct mapping. Can block */
if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) {
/* p->size includes the guard page, but cpa doesn't like that */
change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)),
PAGE_KERNEL);
global_flush_tlb();
}
-out_unlock:
- write_unlock(&vmlist_lock);
+
+ /* Finally remove it */
+ o = remove_vm_area((void *)addr);
+ BUG_ON(p != o || o == NULL);
kfree(p);
}
EXPORT_SYMBOL(iounmap);
pg_data_t *pgdat;
unsigned long i;
struct page_state ps;
+ unsigned long flags;
printk(KERN_INFO "Mem-info:\n");
show_free_areas();
printk(KERN_INFO "Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
for_each_pgdat(pgdat) {
+ pgdat_resize_lock(pgdat, &flags);
for (i = 0; i < pgdat->node_spanned_pages; ++i) {
page = pgdat_page_nr(pgdat, i);
total++;
else if (page_count(page))
shared += page_count(page) - 1;
}
+ pgdat_resize_unlock(pgdat, &flags);
}
printk(KERN_INFO "%d pages of RAM\n", total);
printk(KERN_INFO "%d pages of HIGHMEM\n", highmem);
struct page *page = virt_to_page(pgd);
page->index = (unsigned long)pgd_list;
if (pgd_list)
- pgd_list->private = (unsigned long)&page->index;
+ set_page_private(pgd_list, (unsigned long)&page->index);
pgd_list = page;
- page->private = (unsigned long)&pgd_list;
+ set_page_private(page, (unsigned long)&pgd_list);
}
static inline void pgd_list_del(pgd_t *pgd)
{
struct page *next, **pprev, *page = virt_to_page(pgd);
next = (struct page *)page->index;
- pprev = (struct page **)page->private;
+ pprev = (struct page **)page_private(page);
*pprev = next;
if (next)
- next->private = (unsigned long)pprev;
+ set_page_private(next, (unsigned long)pprev);
}
void pgd_ctor(void *pgd, kmem_cache_t *cache, unsigned long unused)
obj-y := i386.o
obj-$(CONFIG_PCI_BIOS) += pcbios.o
-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
+obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o
obj-$(CONFIG_PCI_DIRECT) += direct.o
pci-y := fixup.o
return 0;
}
-static __init int via_router_probe(struct irq_router *r, struct pci_dev *router, u16 device)
+static __init int via_router_probe(struct irq_router *r,
+ struct pci_dev *router, u16 device)
{
/* FIXME: We should move some of the quirk fixup stuff here */
- if (router->device == PCI_DEVICE_ID_VIA_82C686 &&
- device == PCI_DEVICE_ID_VIA_82C586_0) {
- /* Asus k7m bios wrongly reports 82C686A as 586-compatible */
- device = PCI_DEVICE_ID_VIA_82C686;
+ /*
+ * work arounds for some buggy BIOSes
+ */
+ if (device == PCI_DEVICE_ID_VIA_82C586_0) {
+ switch(router->device) {
+ case PCI_DEVICE_ID_VIA_82C686:
+ /*
+ * Asus k7m bios wrongly reports 82C686A
+ * as 586-compatible
+ */
+ device = PCI_DEVICE_ID_VIA_82C686;
+ break;
+ case PCI_DEVICE_ID_VIA_8235:
+ /**
+ * Asus a7v-x bios wrongly reports 8235
+ * as 586-compatible
+ */
+ device = PCI_DEVICE_ID_VIA_8235;
+ break;
+ }
}
- switch(device)
- {
- case PCI_DEVICE_ID_VIA_82C586_0:
- r->name = "VIA";
- r->get = pirq_via586_get;
- r->set = pirq_via586_set;
- return 1;
- case PCI_DEVICE_ID_VIA_82C596:
- case PCI_DEVICE_ID_VIA_82C686:
- case PCI_DEVICE_ID_VIA_8231:
+ switch(device) {
+ case PCI_DEVICE_ID_VIA_82C586_0:
+ r->name = "VIA";
+ r->get = pirq_via586_get;
+ r->set = pirq_via586_set;
+ return 1;
+ case PCI_DEVICE_ID_VIA_82C596:
+ case PCI_DEVICE_ID_VIA_82C686:
+ case PCI_DEVICE_ID_VIA_8231:
+ case PCI_DEVICE_ID_VIA_8235:
/* FIXME: add new ones for 8233/5 */
- r->name = "VIA";
- r->get = pirq_via_get;
- r->set = pirq_via_set;
- return 1;
+ r->name = "VIA";
+ r->get = pirq_via_get;
+ r->set = pirq_via_set;
+ return 1;
}
return 0;
}
--- /dev/null
+obj-$(CONFIG_PM_LEGACY) += cpu.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += cpu.o
+obj-$(CONFIG_ACPI_SLEEP) += cpu.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += swsusp.o
/* Changed during early boot */
unsigned long high_physmem;
-extern unsigned long physmem_size;
+extern unsigned long long physmem_size;
int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem)
{
extern int __syscall_stub_start, __binary_start;
void setup_physmem(unsigned long start, unsigned long reserve_end,
- unsigned long len, unsigned long highmem)
+ unsigned long len, unsigned long long highmem)
{
unsigned long reserve = reserve_end - start;
int pfn = PFN_UP(__pa(reserve_end));
source "kernel/Kconfig.preempt"
-config K8_NUMA
- bool "K8 NUMA support"
- select NUMA
+config NUMA
+ bool "Non Uniform Memory Access (NUMA) Support"
depends on SMP && !X86_64_XEN
help
- Enable NUMA (Non Unified Memory Architecture) support for
- AMD Opteron Multiprocessor systems. The kernel will try to allocate
- memory used by a CPU on the local memory controller of the CPU
- and add some more NUMA awareness to the kernel.
- This code is recommended on all multiprocessor Opteron systems
- and normally doesn't hurt on others.
+ Enable NUMA (Non Uniform Memory Access) support. The kernel
+ will try to allocate memory used by a CPU on the local memory
+ controller of the CPU and add some more NUMA awareness to the kernel.
+ This code is recommended on all multiprocessor Opteron systems.
+ If the system is EM64T, you should say N unless your system is EM64T
+ NUMA.
+
+config K8_NUMA
+ bool "Old style AMD Opteron NUMA detection"
+ depends on NUMA
+ default y
+ help
+ Enable K8 NUMA node topology detection. You should say Y here if
+ you have a multi processor AMD K8 system. This uses an old
+ method to read the NUMA configurtion directly from the builtin
+ Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
+ instead, which also takes priority if both are compiled in.
+
+# Dummy CONFIG option to select ACPI_NUMA from drivers/acpi/Kconfig.
+
+config X86_64_ACPI_NUMA
+ bool "ACPI NUMA detection"
+ depends on NUMA
+ select ACPI
+ select ACPI_NUMA
+ default y
+ help
+ Enable ACPI SRAT based node topology detection.
config NUMA_EMU
- bool "NUMA emulation support"
- select NUMA
- depends on SMP && !X86_64_XEN
+ bool "NUMA emulation"
+ depends on NUMA
help
Enable NUMA emulation. A flat machine will be split
into virtual nodes when booted with "numa=fake=N", where N is the
depends on NUMA
default y
-config NUMA
- bool
- default n
config ARCH_DISCONTIGMEM_ENABLE
def_bool y
Additional support for intel specific MCE features such as
the thermal monitor.
+config X86_MCE_AMD
+ bool "AMD MCE features"
+ depends on X86_MCE && X86_LOCAL_APIC
+ default y
+ help
+ Additional support for AMD specific MCE features such as
+ the DRAM Error Threshold.
+
config PHYSICAL_START
hex "Physical address where the kernel is loaded" if EMBEDDED
default "0x100000"
left.
config IA32_AOUT
- bool "IA32 a.out support"
+ tristate "IA32 a.out support"
depends on IA32_EMULATION
help
Support old a.out binaries in the 32bit emulation.
source fs/Kconfig
+menu "Instrumentation Support"
+ depends on EXPERIMENTAL
+
source "arch/x86_64/oprofile/Kconfig"
+config KPROBES
+ bool "Kprobes (EXPERIMENTAL)"
+ help
+ Kprobes allows you to trap at almost any kernel address and
+ execute a callback function. register_kprobe() establishes
+ a probepoint and specifies the callback. Kprobes is useful
+ for kernel debugging, non-intrusive instrumentation and testing.
+ If in doubt, say "N".
+endmenu
+
source "arch/x86_64/Kconfig.debug"
source "security/Kconfig"
obj-$(CONFIG_X86_MCE) += mce.o
obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
+obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
obj-$(CONFIG_MTRR) += ../../i386/kernel/cpu/mtrr/
obj-$(CONFIG_ACPI) += acpi/
obj-$(CONFIG_X86_MSR) += msr.o
obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o \
genapic.o genapic_cluster.o genapic_flat.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o
-obj-$(CONFIG_PM) += suspend.o
+obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend.o
+obj-$(CONFIG_ACPI_SLEEP) += suspend.o
obj-$(CONFIG_SOFTWARE_SUSPEND) += suspend_asm.o
obj-$(CONFIG_CPU_FREQ) += cpufreq/
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_GART_IOMMU) += pci-gart.o aperture.o
obj-$(CONFIG_DUMMY_IOMMU) += pci-nommu.o pci-dma.o
-obj-$(CONFIG_SWIOTLB) += swiotlb.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o
bootflag-y += ../../i386/kernel/bootflag.o
cpuid-$(subst m,y,$(CONFIG_X86_CPUID)) += ../../i386/kernel/cpuid.o
topology-y += ../../i386/mach-default/topology.o
-swiotlb-$(CONFIG_SWIOTLB) += ../../ia64/lib/swiotlb.o
microcode-$(subst m,y,$(CONFIG_MICROCODE)) += ../../i386/kernel/microcode.o
intel_cacheinfo-y += ../../i386/kernel/cpu/intel_cacheinfo.o
quirks-y += ../../i386/kernel/quirks.o
ifdef CONFIG_XEN
time-y += ../../i386/kernel/time-xen.o
+obj-$(CONFIG_SWIOTLB) += swiotlb.o
swiotlb-$(CONFIG_SWIOTLB) := ../../i386/kernel/swiotlb.o
pci-dma-y += ../../i386/kernel/pci-dma-xen.o
microcode-$(subst m,y,$(CONFIG_MICROCODE)) := ../../i386/kernel/microcode-xen.o
#include <asm/arch_hooks.h>
#include <asm/hpet.h>
-#include "io_ports.h"
-
/*
* Debug level
*/
#include <asm/e820.h>
#include <asm/proto.h>
#include <asm/bootsetup.h>
+#include <asm/sections.h>
#include <xen/interface/memory.h>
unsigned long pci_mem_start = 0xaeedbabe;
}
#ifndef CONFIG_XEN
-extern char _end[];
/*
* end_pfn only includes RAM, while end_pfn_map includes all e820 entries.
CFI_ENDPROC
.endm
-#if 0
+#ifndef CONFIG_XEN
+ENTRY(thermal_interrupt)
+ apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt
+
+ENTRY(threshold_interrupt)
+ apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt
+
+#ifdef CONFIG_SMP
ENTRY(reschedule_interrupt)
apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt
ENTRY(call_function_interrupt)
apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
#endif
+#endif /* !CONFIG_XEN */
#ifdef CONFIG_X86_LOCAL_APIC
ENTRY(apic_timer_interrupt)
#include <linux/threads.h>
+#include <linux/init.h>
#include <asm/desc.h>
#include <asm/segment.h>
#include <asm/page.h>
.quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */
#endif
+#ifndef CONFIG_XEN
+#ifndef CONFIG_HOTPLUG_CPU
+ __INITDATA
+#endif
+ /*
+ * This default setting generates an ident mapping at address 0x100000
+ * and a mapping for the kernel that precisely maps virtual address
+ * 0xffffffff80000000 to physical address 0x000000. (always using
+ * 2Mbyte large pages provided by PAE mode)
+ */
+ .align PAGE_SIZE
+ENTRY(boot_level4_pgt)
+ .quad 0x0000000000002007 + __PHYSICAL_START /* -> level3_ident_pgt */
+ .fill 255,8,0
+ .quad 0x000000000000a007 + __PHYSICAL_START
+ .fill 254,8,0
+ /* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+ .quad 0x0000000000003007 + __PHYSICAL_START /* -> level3_kernel_pgt */
+#endif
+
.data
.align 16
#include <asm/bootsetup.h>
#include <asm/setup.h>
#include <asm/desc.h>
+#include <asm/pgtable.h>
+#include <asm/sections.h>
unsigned long start_pfn;
#if 0
static void __init clear_bss(void)
{
- extern char __bss_start[], __bss_end[];
memset(__bss_start, 0,
- (unsigned long) __bss_end - (unsigned long) __bss_start);
+ (unsigned long) __bss_stop - (unsigned long) __bss_start);
}
#endif
boot_cpu_data.x86_mask = eax & 0xf;
}
-extern char _end[];
-
void __init x86_64_start_kernel(char * real_mode_data)
{
int i;
* Rough estimation of how many shared IRQs there are, can
* be changed anytime.
*/
-#define MAX_PLUS_SHARED_IRQS NR_IRQS
+#define MAX_PLUS_SHARED_IRQS NR_IRQ_VECTORS
#define PIN_MAP_SIZE (MAX_PLUS_SHARED_IRQS + NR_IRQS)
/*
static int first_free_entry = NR_IRQS;
struct irq_pin_list *entry = irq_2_pin + irq;
+ BUG_ON(irq >= NR_IRQS);
while (entry->next)
entry = irq_2_pin + entry->next;
entry->next = first_free_entry;
entry = irq_2_pin + entry->next;
if (++first_free_entry >= PIN_MAP_SIZE)
- panic("io_apic.c: whoops");
+ panic("io_apic.c: ran out of irq_2_pin entries!");
}
entry->apic = apic;
entry->pin = pin;
int pin; \
struct irq_pin_list *entry = irq_2_pin + irq; \
\
+ BUG_ON(irq >= NR_IRQS); \
for (;;) { \
unsigned int reg; \
pin = entry->pin; \
#endif /* !CONFIG_XEN */
+static u8 gsi_2_irq[NR_IRQ_VECTORS] = { [0 ... NR_IRQ_VECTORS-1] = 0xFF };
+
/*
* support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
* specific CPU-side IRQs.
best_guess = irq;
}
}
+ BUG_ON(best_guess >= NR_IRQS);
return best_guess;
}
return MPBIOS_trigger(idx);
}
+static int next_irq = 16;
+
+/*
+ * gsi_irq_sharing -- Name overload! "irq" can be either a legacy IRQ
+ * in the range 0-15, a linux IRQ in the range 0-223, or a GSI number
+ * from ACPI, which can reach 800 in large boxen.
+ *
+ * Compact the sparse GSI space into a sequential IRQ series and reuse
+ * vectors if possible.
+ */
+int gsi_irq_sharing(int gsi)
+{
+ int i, tries, vector;
+
+ BUG_ON(gsi >= NR_IRQ_VECTORS);
+
+ if (platform_legacy_irq(gsi))
+ return gsi;
+
+ if (gsi_2_irq[gsi] != 0xFF)
+ return (int)gsi_2_irq[gsi];
+
+ tries = NR_IRQS;
+ try_again:
+ vector = assign_irq_vector(gsi);
+
+ /*
+ * Sharing vectors means sharing IRQs, so scan irq_vectors for previous
+ * use of vector and if found, return that IRQ. However, we never want
+ * to share legacy IRQs, which usually have a different trigger mode
+ * than PCI.
+ */
+ for (i = 0; i < NR_IRQS; i++)
+ if (IO_APIC_VECTOR(i) == vector)
+ break;
+ if (platform_legacy_irq(i)) {
+ if (--tries >= 0) {
+ IO_APIC_VECTOR(i) = 0;
+ goto try_again;
+ }
+ panic("gsi_irq_sharing: didn't find an IRQ using vector 0x%02X for GSI %d", vector, gsi);
+ }
+ if (i < NR_IRQS) {
+ gsi_2_irq[gsi] = i;
+ printk(KERN_INFO "GSI %d sharing vector 0x%02X and IRQ %d\n",
+ gsi, vector, i);
+ return i;
+ }
+
+ i = next_irq++;
+ BUG_ON(i >= NR_IRQS);
+ gsi_2_irq[gsi] = i;
+ IO_APIC_VECTOR(i) = vector;
+ printk(KERN_INFO "GSI %d assigned vector 0x%02X and IRQ %d\n",
+ gsi, vector, i);
+ return i;
+}
+
static int pin_2_irq(int idx, int apic, int pin)
{
int irq, i;
while (i < apic)
irq += nr_ioapic_registers[i++];
irq += pin;
+ irq = gsi_irq_sharing(irq);
break;
}
default:
break;
}
}
+ BUG_ON(irq >= NR_IRQS);
/*
* PCI IRQ command line redirection. Yes, limits are hardcoded.
}
}
}
+ BUG_ON(irq >= NR_IRQS);
return irq;
}
static int current_vector = FIRST_DEVICE_VECTOR;
physdev_op_t op;
- BUG_ON(irq >= NR_IRQ_VECTORS);
+ BUG_ON(irq != AUTO_ASSIGN && (unsigned)irq >= NR_IRQ_VECTORS);
if (irq != AUTO_ASSIGN && IO_APIC_VECTOR(irq) > 0)
return IO_APIC_VECTOR(irq);
entry.polarity = active_high_low;
entry.mask = 1; /* Disabled (masked) */
+ irq = gsi_irq_sharing(irq);
/*
* IRQs < 16 are already in the irq_2_pin[] map
*/
* Various Linux-internal data structures created from the
* MP-table.
*/
-int apic_version [MAX_APICS];
+unsigned char apic_version [MAX_APICS];
unsigned char mp_bus_id_to_type [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
int mp_bus_id_to_pci_bus [MAX_MP_BUSSES] = { [0 ... MAX_MP_BUSSES-1] = -1 };
/* Processor that is doing the boot up */
unsigned int boot_cpu_id = -1U;
/* Internal processor count */
-static unsigned int num_processors = 0;
+unsigned int num_processors __initdata = 0;
+
+unsigned disabled_cpus __initdata;
/* Bitmask of physically existing CPUs */
physid_mask_t phys_cpu_present_map = PHYSID_MASK_NONE;
#ifndef CONFIG_XEN
static void __init MP_processor_info (struct mpc_config_processor *m)
{
- int ver, cpu;
+ int cpu;
+ unsigned char ver;
static int found_bsp=0;
- if (!(m->mpc_cpuflag & CPU_ENABLED))
+ if (!(m->mpc_cpuflag & CPU_ENABLED)) {
+ disabled_cpus++;
return;
+ }
printk(KERN_INFO "Processor #%d %d:%d APIC version %d\n",
m->mpc_apicid,
}
cpu = num_processors++;
-
- if (m->mpc_apicid > MAX_APICS) {
+
+#if MAX_APICS < 255
+ if ((int)m->mpc_apicid > MAX_APICS) {
printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
m->mpc_apicid, MAX_APICS);
return;
}
+#endif
ver = m->mpc_apicver;
physid_set(m->mpc_apicid, phys_cpu_present_map);
m->mpc_irqtype, m->mpc_irqflag & 3,
(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
- if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ if (++mp_irq_entries >= MAX_IRQ_SOURCES)
panic("Max # of irq sources exceeded!!\n");
}
*/
void *dma_alloc_coherent(struct device *hwdev, size_t size,
- dma_addr_t *dma_handle, unsigned gfp)
+ dma_addr_t *dma_handle, gfp_t gfp)
{
void *ret;
u64 mask;
if (need_resched()) {
local_irq_enable();
} else {
+ clear_thread_flag(TIF_POLLING_NRFLAG);
+ smp_mb__after_clear_bit();
stop_hz_timer();
/* Blocking includes an implicit local_irq_enable(). */
HYPERVISOR_sched_op(SCHEDOP_block, 0);
start_hz_timer();
+ set_thread_flag(TIF_POLLING_NRFLAG);
}
}
*/
void cpu_idle (void)
{
+ set_thread_flag(TIF_POLLING_NRFLAG);
+
/* endless idle loop with no priority at all */
while (1) {
while (!need_resched()) {
xen_idle();
}
+ preempt_enable_no_resched();
schedule();
+ preempt_disable();
}
}
do {
ssleep(1);
for_each_online_cpu(cpu) {
- if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
+ if (cpu_isset(cpu, map) &&
+ !per_cpu(cpu_idle_state, cpu))
cpu_clear(cpu, map);
}
cpus_and(map, map, cpu_online_map);
system_utsname.version);
printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
printk_address(regs->rip);
- printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
+ printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
+ regs->eflags);
printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
regs->rax, regs->rbx, regs->rcx);
printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
struct task_struct *tsk = current;
struct thread_info *t = current_thread_info();
- /*
- * Remove function-return probe instances associated with this task
- * and put them back on the free list. Do not insert an exit probe for
- * this function, it will be disabled by kprobe_flush_task if you do.
- */
- kprobe_flush_task(tsk);
-
if (t->flags & _TIF_ABI_PENDING)
t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
struct pt_regs * childregs;
struct task_struct *me = current;
- childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
-
+ childregs = ((struct pt_regs *)
+ (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
*childregs = *regs;
childregs->rax = 0;
childregs->rsp = rsp;
- if (rsp == ~0UL) {
+ if (rsp == ~0UL)
childregs->rsp = (unsigned long)childregs;
- }
p->thread.rsp = (unsigned long) childregs;
p->thread.rsp0 = (unsigned long) (childregs+1);
p->thread.io_bitmap_max = 0;
return -ENOMEM;
}
- memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES);
+ memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
+ IO_BITMAP_BYTES);
}
/*
* - fold all the options into a flag word and test it with a single test.
* - could test fs/gs bitsliced
*/
-struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
+struct task_struct *
+__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
{
struct thread_struct *prev = &prev_p->thread,
*next = &next_p->thread;
prev->userrsp = read_pda(oldrsp);
write_pda(oldrsp, next->userrsp);
write_pda(pcurrent, next_p);
- write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
+ write_pda(kernelstack,
+ (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
/*
* Now maybe reload the debug registers
return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
}
-asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
+asmlinkage long
+sys_clone(unsigned long clone_flags, unsigned long newsp,
+ void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
{
if (!newsp)
newsp = regs->rsp;
return 0;
fp = *(u64 *)(p->thread.rsp);
do {
- if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE)
+ if (fp < (unsigned long)stack ||
+ fp > (unsigned long)stack+THREAD_SIZE)
return 0;
rip = *(u64 *)(fp+8);
if (!in_sched_functions(rip))
task->thread.gsindex = 0;
task->thread.gs = addr;
if (doit) {
- load_gs_index(0);
- ret = HYPERVISOR_set_segment_base(SEGBASE_GS_USER, addr);
+ load_gs_index(0);
+ ret = HYPERVISOR_set_segment_base(
+ SEGBASE_GS_USER, addr);
}
}
put_cpu();
set_32bit_tls(task, FS_TLS, addr);
if (doit) {
load_TLS(&task->thread, cpu);
- asm volatile("mov %0,%%fs" :: "r" (FS_TLS_SEL));
+ asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
}
task->thread.fsindex = FS_TLS_SEL;
task->thread.fs = 0;
if (doit) {
/* set the selector to 0 to not confuse
__switch_to */
- asm volatile("mov %0,%%fs" :: "r" (0));
- ret = HYPERVISOR_set_segment_base(SEGBASE_FS, addr);
-
+ asm volatile("movl %0,%%fs" :: "r" (0));
+ ret = HYPERVISOR_set_segment_base(SEGBASE_FS,
+ addr);
}
}
put_cpu();
unsigned long base;
if (task->thread.fsindex == FS_TLS_SEL)
base = read_32bit_tls(task, FS_TLS);
- else if (doit) {
+ else if (doit)
rdmsrl(MSR_FS_BASE, base);
- } else
+ else
base = task->thread.fs;
ret = put_user(base, (unsigned long __user *)addr);
break;
unsigned long base;
if (task->thread.gsindex == GS_TLS_SEL)
base = read_32bit_tls(task, GS_TLS);
- else if (doit) {
+ else if (doit)
rdmsrl(MSR_KERNEL_GS_BASE, base);
- } else
+ else
base = task->thread.gs;
ret = put_user(base, (unsigned long __user *)addr);
break;
#include <asm/setup.h>
#include <asm/mach_apic.h>
#include <asm/numa.h>
+#include <asm/sections.h>
#ifdef CONFIG_XEN
#include <linux/percpu.h>
#include <xen/interface/physdev.h>
struct e820map e820;
extern int root_mountflags;
-extern char _text, _etext, _edata, _end;
char command_line[COMMAND_LINE_SIZE];
{
unsigned long bootmap_size, bootmap;
- memory_present(0, start_pfn, end_pfn);
bootmap_size = bootmem_bootmap_pages(end_pfn)<<PAGE_SHIFT;
bootmap = find_e820_area(0, end_pfn<<PAGE_SHIFT, bootmap_size);
if (bootmap == -1L)
}
#endif
- sparse_init();
-
paging_init();
#ifdef CONFIG_X86_LOCAL_APIC
/*
check_ioapic();
#endif
+ zap_low_mappings(0);
+
#ifdef CONFIG_ACPI
/*
* Initialize the ACPI boot-time table parser (gets the RSDP and SDT).
#endif
bits = 0;
- while ((1 << bits) < c->x86_num_cores)
+ while ((1 << bits) < c->x86_max_cores)
bits++;
/* Low order bits define the core id (index of core in socket) */
if (!node_online(node))
node = nearby_node(apicid);
}
- cpu_to_node[cpu] = node;
+ numa_set_node(cpu, node);
printk(KERN_INFO "CPU %d(%d) -> Node %d -> Core %d\n",
- cpu, c->x86_num_cores, node, cpu_core_id[cpu]);
+ cpu, c->x86_max_cores, node, cpu_core_id[cpu]);
#endif
#endif
}
display_cacheinfo(c);
if (c->extended_cpuid_level >= 0x80000008) {
- c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
- if (c->x86_num_cores & (c->x86_num_cores - 1))
- c->x86_num_cores = 1;
+ c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1;
+ if (c->x86_max_cores & (c->x86_max_cores - 1))
+ c->x86_max_cores = 1;
amd_detect_cmp(c);
}
{
#ifdef CONFIG_SMP
u32 eax, ebx, ecx, edx;
- int index_msb, tmp;
+ int index_msb, core_bits;
int cpu = smp_processor_id();
-
+
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+
+ c->apicid = phys_pkg_id(0);
+
if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
return;
- cpuid(1, &eax, &ebx, &ecx, &edx);
smp_num_siblings = (ebx & 0xff0000) >> 16;
-
+
if (smp_num_siblings == 1) {
printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
- } else if (smp_num_siblings > 1) {
- index_msb = 31;
- /*
- * At this point we only support two siblings per
- * processor package.
- */
+ } else if (smp_num_siblings > 1 ) {
+
if (smp_num_siblings > NR_CPUS) {
printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
smp_num_siblings = 1;
return;
}
- tmp = smp_num_siblings;
- while ((tmp & 0x80000000 ) == 0) {
- tmp <<=1 ;
- index_msb--;
- }
- if (smp_num_siblings & (smp_num_siblings - 1))
- index_msb++;
+
+ index_msb = get_count_order(smp_num_siblings);
phys_proc_id[cpu] = phys_pkg_id(index_msb);
-
+
printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
phys_proc_id[cpu]);
- smp_num_siblings = smp_num_siblings / c->x86_num_cores;
+ smp_num_siblings = smp_num_siblings / c->x86_max_cores;
- tmp = smp_num_siblings;
- index_msb = 31;
- while ((tmp & 0x80000000) == 0) {
- tmp <<=1 ;
- index_msb--;
- }
- if (smp_num_siblings & (smp_num_siblings - 1))
- index_msb++;
+ index_msb = get_count_order(smp_num_siblings) ;
- cpu_core_id[cpu] = phys_pkg_id(index_msb);
+ core_bits = get_count_order(c->x86_max_cores);
- if (c->x86_num_cores > 1)
+ cpu_core_id[cpu] = phys_pkg_id(index_msb) &
+ ((1 << core_bits) - 1);
+
+ if (c->x86_max_cores > 1)
printk(KERN_INFO "CPU: Processor Core ID: %d\n",
cpu_core_id[cpu]);
}
node = apicid_to_node[hard_smp_processor_id()];
if (node == NUMA_NO_NODE)
node = 0;
- cpu_to_node[cpu] = node;
+ numa_set_node(cpu, node);
if (acpi_numa > 0)
printk(KERN_INFO "CPU %d -> Node %d\n", cpu, node);
unsigned eax = cpuid_eax(0x80000008);
c->x86_virt_bits = (eax >> 8) & 0xff;
c->x86_phys_bits = eax & 0xff;
+ /* CPUID workaround for Intel 0F34 CPU */
+ if (c->x86_vendor == X86_VENDOR_INTEL &&
+ c->x86 == 0xF && c->x86_model == 0x3 &&
+ c->x86_mask == 0x4)
+ c->x86_phys_bits = 36;
}
if (c->x86 == 15)
c->x86_cache_alignment = c->x86_clflush_size * 2;
if (c->x86 >= 15)
set_bit(X86_FEATURE_CONSTANT_TSC, &c->x86_capability);
- c->x86_num_cores = intel_num_cpu_cores(c);
+ c->x86_max_cores = intel_num_cpu_cores(c);
srat_detect_node();
}
c->x86_model_id[0] = '\0'; /* Unset */
c->x86_clflush_size = 64;
c->x86_cache_alignment = c->x86_clflush_size;
- c->x86_num_cores = 1;
+ c->x86_max_cores = 1;
c->extended_cpuid_level = 0;
memset(&c->x86_capability, 0, sizeof c->x86_capability);
c->x86 = (tfms >> 8) & 0xf;
c->x86_model = (tfms >> 4) & 0xf;
c->x86_mask = tfms & 0xf;
- if (c->x86 == 0xf) {
+ if (c->x86 == 0xf)
c->x86 += (tfms >> 20) & 0xff;
+ if (c->x86 >= 0x6)
c->x86_model += ((tfms >> 16) & 0xF) << 4;
- }
if (c->x86_capability[0] & (1<<19))
c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
} else {
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
/* Intel-defined (#2) */
- "pni", NULL, NULL, "monitor", "ds_cpl", NULL, NULL, "est",
+ "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", NULL, "est",
"tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
seq_printf(m, "cache size\t: %d KB\n", c->x86_cache_size);
#ifdef CONFIG_SMP
- if (smp_num_siblings * c->x86_num_cores > 1) {
+ if (smp_num_siblings * c->x86_max_cores > 1) {
int cpu = c - cpu_data;
seq_printf(m, "physical id\t: %d\n", phys_proc_id[cpu]);
- seq_printf(m, "siblings\t: %d\n",
- c->x86_num_cores * smp_num_siblings);
+ seq_printf(m, "siblings\t: %d\n", cpus_weight(cpu_core_map[cpu]));
seq_printf(m, "core id\t\t: %d\n", cpu_core_id[cpu]);
- seq_printf(m, "cpu cores\t: %d\n", c->x86_num_cores);
+ seq_printf(m, "cpu cores\t: %d\n", c->booted_cores);
}
#endif
/* CPU 0 is initialised in head64.c */
if (cpu != 0) {
pda_init(cpu);
+ zap_low_mappings(cpu);
} else
estacks = boot_exception_stacks;
static DEFINE_PER_CPU(union smp_flush_state, flush_state);
#endif
-#define __cpuinit __init
-
/*
* We cannot call mmdrop() because we are in interrupt context,
* instead update mm->cpu_vm_mask.
void smp_stop_cpu(void)
{
+ unsigned long flags;
/*
* Remove this CPU:
*/
cpu_clear(smp_processor_id(), cpu_online_map);
- local_irq_disable();
+ local_irq_save(flags);
#ifndef CONFIG_XEN
disable_local_APIC();
#endif
- local_irq_enable();
+ local_irq_restore(flags);
}
static void smp_really_stop_cpu(void *dummy)
{
conditional_sti(regs);
-#ifdef CONFIG_CHECKING
- {
- unsigned long gs;
- struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
- rdmsrl(MSR_GS_BASE, gs);
- if (gs != (unsigned long)pda) {
- wrmsrl(MSR_GS_BASE, pda);
- printk("%s: wrong gs %lx expected %p rip %lx\n", str, gs, pda,
- regs->rip);
- }
- }
-#endif
-
if (user_mode(regs)) {
struct task_struct *tsk = current;
{
conditional_sti(regs);
-#ifdef CONFIG_CHECKING
- {
- unsigned long gs;
- struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
- rdmsrl(MSR_GS_BASE, gs);
- if (gs != (unsigned long)pda) {
- wrmsrl(MSR_GS_BASE, pda);
- oops_in_progress++;
- printk("general protection handler: wrong gs %lx expected %p\n", gs, pda);
- oops_in_progress--;
- }
- }
-#endif
-
if (user_mode(regs)) {
struct task_struct *tsk = current;
struct task_struct *tsk = current;
siginfo_t info;
-#ifdef CONFIG_CHECKING
- {
- /* RED-PEN interaction with debugger - could destroy gs */
- unsigned long gs;
- struct x8664_pda *pda = cpu_pda + safe_smp_processor_id();
- rdmsrl(MSR_GS_BASE, gs);
- if (gs != (unsigned long)pda) {
- wrmsrl(MSR_GS_BASE, pda);
- printk("debug handler: wrong gs %lx expected %p\n", gs, pda);
- }
- }
-#endif
-
get_debugreg(condition, 6);
if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
}
#endif
+asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
+{
+}
+
/*
* 'math_state_restore()' saves the current math information in the
* old math state array, and gets the new ones from the current task
#endif
EXPORT_SYMBOL(cpu_khz);
+
+EXPORT_SYMBOL(load_gs_index);
+
#include <linux/compiler.h>
#include <linux/module.h>
#include <linux/kprobes.h>
-#include <linux/percpu.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm-generic/sections.h>
#include <asm/kdebug.h>
-DEFINE_PER_CPU(pgd_t *, cur_pgd);
-
void bust_spinlocks(int yes)
{
int loglevel_save = console_loglevel;
pmd_t *pmd;
pte_t *pte;
- preempt_disable();
- pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
- preempt_enable();
+ asm("movq %%cr3,%0" : "=r" (pgd));
+ pgd = (pgd_t *)machine_to_phys((maddr_t)pgd);
+
+ pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
pgd += pgd_index(address);
printk("PGD %lx ", pgd_val(*pgd));
if (bad_address(pgd)) goto bad;
/* On Xen the line below does not always work. Needs investigating! */
/*pgd = pgd_offset(current->mm ?: &init_mm, address);*/
- preempt_disable();
- pgd = (pgd_t *)per_cpu(cur_pgd, smp_processor_id());
- preempt_enable();
+ asm("movq %%cr3,%0" : "=r" (pgd));
+ pgd = (pgd_t *)machine_to_phys((maddr_t)pgd);
+ pgd = __va((unsigned long)pgd & PHYSICAL_PAGE_MASK);
pgd += pgd_index(address);
pgd_ref = pgd_offset_k(address);
if (pgd_none(*pgd_ref))
if (!user_mode(regs))
error_code &= ~4; /* means kernel */
-#ifdef CONFIG_CHECKING
- {
- unsigned long gs;
- struct x8664_pda *pda = cpu_pda + stack_smp_processor_id();
- rdmsrl(MSR_GS_BASE, gs);
- if (gs != (unsigned long)pda) {
- wrmsrl(MSR_GS_BASE, pda);
- printk("page_fault: wrong gs %lx expected %p\n", gs, pda);
- }
- }
-#endif
-
/* get the address */
address = HYPERVISOR_shared_info->vcpu_info[
smp_processor_id()].arch.cr2;
-
if (notify_die(DIE_PAGE_FAULT, "page fault", regs, error_code, 14,
SIGSEGV) == NOTIFY_STOP)
return;
force_sig_info(SIGBUS, &info, tsk);
return;
}
+
+static int __init enable_pagefaulttrace(char *str)
+{
+ page_fault_trace = 1;
+ return 0;
+}
+__setup("pagefaulttrace", enable_pagefaulttrace);
#include <linux/pagemap.h>
#include <linux/bootmem.h>
#include <linux/proc_fs.h>
+#include <linux/pci.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/mmu_context.h>
#include <asm/proto.h>
#include <asm/smp.h>
+#include <asm/sections.h>
#include <xen/features.h>
extern void swiotlb_init(void);
#endif
-extern char _stext[];
+static unsigned long dma_reserve __initdata;
DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
extern unsigned long start_pfn;
/* References to section boundaries */
-extern char _text, _etext, _edata, __bss_start, _end[];
-extern char __init_begin, __init_end;
-
int after_bootmem;
static void *spp_getpage(void)
void __init xen_init_pt(void)
{
unsigned long addr, *page;
- int i;
-
- for (i = 0; i < NR_CPUS; i++)
- per_cpu(cur_pgd, i) = init_mm.pgd;
memset((void *)init_level4_pgt, 0, PAGE_SIZE);
memset((void *)level3_kernel_pgt, 0, PAGE_SIZE);
__flush_tlb_all();
}
-extern struct x8664_pda cpu_pda[NR_CPUS];
-
-void zap_low_mappings(void)
+void __cpuinit zap_low_mappings(int cpu)
{
/* this is not required for Xen */
#if 0
#endif
}
-#ifndef CONFIG_NUMA
-void __init paging_init(void)
+/* Compute zone sizes for the DMA and DMA32 zones in a node. */
+__init void
+size_zones(unsigned long *z, unsigned long *h,
+ unsigned long start_pfn, unsigned long end_pfn)
{
- {
- unsigned long zones_size[MAX_NR_ZONES];
- unsigned long holes[MAX_NR_ZONES];
- /* unsigned int max_dma; */
+ int i;
+#ifndef CONFIG_XEN
+ unsigned long w;
+#endif
- memset(zones_size, 0, sizeof(zones_size));
- memset(holes, 0, sizeof(holes));
+ for (i = 0; i < MAX_NR_ZONES; i++)
+ z[i] = 0;
+
+#ifndef CONFIG_XEN
+ if (start_pfn < MAX_DMA_PFN)
+ z[ZONE_DMA] = MAX_DMA_PFN - start_pfn;
+ if (start_pfn < MAX_DMA32_PFN) {
+ unsigned long dma32_pfn = MAX_DMA32_PFN;
+ if (dma32_pfn > end_pfn)
+ dma32_pfn = end_pfn;
+ z[ZONE_DMA32] = dma32_pfn - start_pfn;
+ }
+ z[ZONE_NORMAL] = end_pfn - start_pfn;
+
+ /* Remove lower zones from higher ones. */
+ w = 0;
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ if (z[i])
+ z[i] -= w;
+ w += z[i];
+ }
- /* max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT; */
- /* if (end_pfn < max_dma) { */
- zones_size[ZONE_DMA] = end_pfn;
-#if 0
- holes[ZONE_DMA] = e820_hole_size(0, end_pfn);
- } else {
- zones_size[ZONE_DMA] = max_dma;
- holes[ZONE_DMA] = e820_hole_size(0, max_dma);
- zones_size[ZONE_NORMAL] = end_pfn - max_dma;
- holes[ZONE_NORMAL] = e820_hole_size(max_dma, end_pfn);
+ /* Compute holes */
+ w = start_pfn;
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ unsigned long s = w;
+ w += z[i];
+ h[i] = e820_hole_size(s, w);
+ }
+
+ /* Add the space pace needed for mem_map to the holes too. */
+ for (i = 0; i < MAX_NR_ZONES; i++)
+ h[i] += (z[i] * sizeof(struct page)) / PAGE_SIZE;
+
+ /* The 16MB DMA zone has the kernel and other misc mappings.
+ Account them too */
+ if (h[ZONE_DMA]) {
+ h[ZONE_DMA] += dma_reserve;
+ if (h[ZONE_DMA] >= z[ZONE_DMA]) {
+ printk(KERN_WARNING
+ "Kernel too large and filling up ZONE_DMA?\n");
+ h[ZONE_DMA] = z[ZONE_DMA];
}
-#endif
- free_area_init_node(0, NODE_DATA(0), zones_size,
- __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
}
+#else
+ z[ZONE_DMA] = end_pfn;
+ for (i = 0; i < MAX_NR_ZONES; i++)
+ h[i] = 0;
+#endif
+}
+
+#ifndef CONFIG_NUMA
+void __init paging_init(void)
+{
+ unsigned long zones[MAX_NR_ZONES], holes[MAX_NR_ZONES];
+ size_zones(zones, holes, 0, end_pfn);
+ free_area_init_node(0, NODE_DATA(0), zones,
+ __pa(PAGE_OFFSET) >> PAGE_SHIFT, holes);
set_fixmap(FIX_SHARED_INFO, xen_start_info->shared_info);
HYPERVISOR_shared_info = (shared_info_t *)fix_to_virt(FIX_SHARED_INFO);
datasize >> 10,
initsize >> 10);
+#ifndef CONFIG_XEN
+#ifdef CONFIG_SMP
/*
- * Subtle. SMP is doing its boot stuff late (because it has to
- * fork idle threads) - but it also needs low mappings for the
- * protected-mode entry to work. We zap these entries only after
- * the WP-bit has been tested.
+ * Sync boot_level4_pgt mappings with the init_level4_pgt
+ * except for the low identity mappings which are already zapped
+ * in init_level4_pgt. This sync-up is essential for AP's bringup
*/
-#ifndef CONFIG_SMP
- zap_low_mappings();
+ memcpy(boot_level4_pgt+1, init_level4_pgt+1, (PTRS_PER_PGD-1)*sizeof(pgd_t));
+#endif
#endif
}
-extern char __initdata_begin[], __initdata_end[];
-
void free_initmem(void)
{
#ifdef __DO_LATER__
totalram_pages++;
}
memset(__initdata_begin, 0xba, __initdata_end - __initdata_begin);
- printk ("Freeing unused kernel memory: %luk freed\n", (&__init_end - &__init_begin) >> 10);
+ printk ("Freeing unused kernel memory: %luk freed\n", (__init_end - __init_begin) >> 10);
#endif
}
#else
reserve_bootmem(phys, len);
#endif
+ if (phys+len <= MAX_DMA_PFN*PAGE_SIZE)
+ dma_reserve += len / PAGE_SIZE;
}
int kern_addr_valid(unsigned long addr)
static ctl_table debug_table2[] = {
{ 99, "exception-trace", &exception_trace, sizeof(int), 0644, NULL,
proc_dointvec },
-#ifdef CONFIG_CHECKING
- { 100, "page-fault-trace", &page_fault_trace, sizeof(int), 0644, NULL,
- proc_dointvec },
-#endif
{ 0, }
};
obj-$(CONFIG_ACPI) += acpi.o
obj-y += legacy.o irq.o common.o
# mmconfig has a 64bit special
-obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o
+obj-$(CONFIG_PCI_MMCONFIG) += mmconfig.o direct.o
obj-$(CONFIG_NUMA) += k8-bus.o
obj-$(CONFIG_PCI) += pci/
obj-$(CONFIG_PARISC) += parisc/
+obj-$(CONFIG_RAPIDIO) += rapidio/
obj-y += video/
obj-$(CONFIG_ACPI) += acpi/
# PnP must come after ACPI since it will eventually need to check if acpi
obj-$(CONFIG_PARIDE) += block/paride/
obj-$(CONFIG_TC) += tc/
obj-$(CONFIG_USB) += usb/
+obj-$(CONFIG_PCI) += usb/
obj-$(CONFIG_USB_GADGET) += usb/gadget/
obj-$(CONFIG_GAMEPORT) += input/gameport/
obj-$(CONFIG_INPUT) += input/
obj-$(CONFIG_SGI_IOC4) += sn/
obj-y += firmware/
obj-$(CONFIG_CRYPTO) += crypto/
+obj-$(CONFIG_SUPERH) += sh/
config ACPI_IBM
tristate "IBM ThinkPad Laptop Extras"
depends on X86
- default y
---help---
This is a Linux ACPI driver for the IBM ThinkPad laptops. It adds
support for Fn-Fx key combinations, Bluetooth control, video
static int mmap_mem(struct file * file, struct vm_area_struct * vma)
{
#if defined(__HAVE_PHYS_MEM_ACCESS_PROT)
- unsigned long offset = vma->vm_pgoff << PAGE_SHIFT;
-
- vma->vm_page_prot = phys_mem_access_prot(file, offset,
+ vma->vm_page_prot = phys_mem_access_prot(file, vma->vm_pgoff,
vma->vm_end - vma->vm_start,
vma->vm_page_prot);
#elif defined(pgprot_noncached)
mem_class = class_create(THIS_MODULE, "mem");
for (i = 0; i < ARRAY_SIZE(devlist); i++) {
- class_device_create(mem_class, MKDEV(MEM_MAJOR, devlist[i].minor),
+ class_device_create(mem_class, NULL,
+ MKDEV(MEM_MAJOR, devlist[i].minor),
NULL, devlist[i].name);
devfs_mk_cdev(MKDEV(MEM_MAJOR, devlist[i].minor),
S_IFCHR | devlist[i].mode, devlist[i].name);
config TCG_TPM
tristate "TPM Hardware Support"
- depends on EXPERIMENTAL && (PCI || XEN)
+ depends on EXPERIMENTAL
---help---
If you have a TPM security chip in your system, which
implements the Trusted Computing Group's specification,
{
struct tpm_chip *chip = (struct tpm_chip *) ptr;
+ schedule_work(&chip->work);
+}
+
+static void timeout_work(void * ptr)
+{
+ struct tpm_chip *chip = ptr;
+
down(&chip->buffer_mutex);
atomic_set(&chip->data_pending, 0);
memset(chip->data_buffer, 0, chip->vendor->buffersize);
__be32 index;
char *str = buf;
- struct tpm_chip *chip =
- pci_get_drvdata(to_pci_dev(dev));
+ struct tpm_chip *chip = dev_get_drvdata(dev);
if (chip == NULL)
return -ENODEV;
< READ_PCR_RESULT_SIZE){
dev_dbg(chip->dev, "A TPM error (%d) occurred"
" attempting to read PCR %d of %d\n",
- be32_to_cpu(*((__be32 *) (data + 6))), i, num_pcrs);
+ be32_to_cpu(*((__be32 *) (data + 6))),
+ i, num_pcrs);
goto out;
}
str += sprintf(str, "PCR-%02d: ", i);
int i, rc;
char *str = buf;
- struct tpm_chip *chip =
- pci_get_drvdata(to_pci_dev(dev));
+ struct tpm_chip *chip = dev_get_drvdata(dev);
if (chip == NULL)
return -ENODEV;
- data = kmalloc(READ_PUBEK_RESULT_SIZE, GFP_KERNEL);
+ data = kzalloc(READ_PUBEK_RESULT_SIZE, GFP_KERNEL);
if (!data)
return -ENOMEM;
memcpy(data, readpubek, sizeof(readpubek));
- memset(data + sizeof(readpubek), 0, 20); /* zero nonce */
if ((len = tpm_transmit(chip, data, READ_PUBEK_RESULT_SIZE)) <
READ_PUBEK_RESULT_SIZE) {
kfree(data);
return rc;
}
-
EXPORT_SYMBOL_GPL(tpm_show_pubek);
#define CAP_VER_RESULT_SIZE 18
ssize_t len;
char *str = buf;
- struct tpm_chip *chip =
- pci_get_drvdata(to_pci_dev(dev));
+ struct tpm_chip *chip = dev_get_drvdata(dev);
if (chip == NULL)
return -ENODEV;
}
EXPORT_SYMBOL_GPL(tpm_store_cancel);
-
/*
* Device file system interface to the TPM
*/
}
if (chip->num_opens) {
- dev_dbg(chip->dev,
- "Another process owns this TPM\n");
+ dev_dbg(chip->dev, "Another process owns this TPM\n");
rc = -EBUSY;
goto err_out;
}
spin_unlock(&driver_lock);
return rc;
}
-
EXPORT_SYMBOL_GPL(tpm_open);
int tpm_release(struct inode *inode, struct file *file)
file->private_data = NULL;
chip->num_opens--;
del_singleshot_timer_sync(&chip->user_read_timer);
+ flush_scheduled_work();
atomic_set(&chip->data_pending, 0);
put_device(chip->dev);
kfree(chip->data_buffer);
spin_unlock(&driver_lock);
return 0;
}
-
EXPORT_SYMBOL_GPL(tpm_release);
-ssize_t tpm_write(struct file * file, const char __user * buf,
+ssize_t tpm_write(struct file *file, const char __user *buf,
size_t size, loff_t * off)
{
struct tpm_chip *chip = file->private_data;
EXPORT_SYMBOL_GPL(tpm_write);
-ssize_t tpm_read(struct file * file, char __user * buf,
+ssize_t tpm_read(struct file * file, char __user *buf,
size_t size, loff_t * off)
{
struct tpm_chip *chip = file->private_data;
int ret_size;
int pos, pending = 0;
+ flush_scheduled_work();
ret_size = atomic_read(&chip->data_pending);
if (ret_size > 0) { /* relay data */
if (size < ret_size)
pos = atomic_read(&chip->data_position);
down(&chip->buffer_mutex);
- if (copy_to_user
- ((void __user *) buf, &chip->data_buffer[pos], ret_size)) {
+ if (copy_to_user(buf, &chip->data_buffer[pos], ret_size)) {
ret_size = -EFAULT;
} else {
pending = atomic_read(&chip->data_pending) - ret_size;
return ret_size;
}
-
EXPORT_SYMBOL_GPL(tpm_read);
void tpm_remove_hardware(struct device *dev)
sysfs_remove_group(&dev->kobj, chip->vendor->attr_group);
- dev_mask[chip->dev_num / TPM_NUM_MASK_ENTRIES ] &= !(1 << (chip->dev_num % TPM_NUM_MASK_ENTRIES));
+ dev_mask[chip->dev_num / TPM_NUM_MASK_ENTRIES ] &=
+ ~(1 << (chip->dev_num % TPM_NUM_MASK_ENTRIES));
kfree(chip);
put_device(dev);
}
-
EXPORT_SYMBOL_GPL(tpm_remove_hardware);
static u8 savestate[] = {
* We are about to suspend. Save the TPM state
* so that it can be restored.
*/
-int tpm_pm_suspend(struct pci_dev *pci_dev, pm_message_t pm_state)
+int tpm_pm_suspend(struct device *dev, pm_message_t pm_state)
{
- struct tpm_chip *chip = pci_get_drvdata(pci_dev);
+ struct tpm_chip *chip = dev_get_drvdata(dev);
if (chip == NULL)
return -ENODEV;
tpm_transmit(chip, savestate, sizeof(savestate));
return 0;
}
-
EXPORT_SYMBOL_GPL(tpm_pm_suspend);
/*
* Resume from a power safe. The BIOS already restored
* the TPM state.
*/
-int tpm_pm_resume(struct pci_dev *pci_dev)
+int tpm_pm_resume(struct device *dev)
{
- struct tpm_chip *chip = pci_get_drvdata(pci_dev);
+ struct tpm_chip *chip = dev_get_drvdata(dev);
if (chip == NULL)
return -ENODEV;
return 0;
}
-
EXPORT_SYMBOL_GPL(tpm_pm_resume);
/*
* upon errant exit from this function specific probe function should call
* pci_disable_device
*/
-int tpm_register_hardware(struct device *dev,
- struct tpm_vendor_specific *entry)
+int tpm_register_hardware(struct device *dev, struct tpm_vendor_specific *entry)
{
#define DEVNAME_SIZE 7
int i, j;
/* Driver specific per-device data */
- chip = kmalloc(sizeof(*chip), GFP_KERNEL);
+ chip = kzalloc(sizeof(*chip), GFP_KERNEL);
if (chip == NULL)
return -ENOMEM;
- memset(chip, 0, sizeof(struct tpm_chip));
-
init_MUTEX(&chip->buffer_mutex);
init_MUTEX(&chip->tpm_mutex);
INIT_LIST_HEAD(&chip->list);
+ INIT_WORK(&chip->work, timeout_work, chip);
+
init_timer(&chip->user_read_timer);
chip->user_read_timer.function = user_reader_timeout;
chip->user_read_timer.data = (unsigned long) chip;
dev_num_search_complete:
if (chip->dev_num < 0) {
- dev_err(dev,
- "No available tpm device numbers\n");
+ dev_err(dev, "No available tpm device numbers\n");
kfree(chip);
return -ENODEV;
} else if (chip->dev_num == 0)
return 0;
}
-
EXPORT_SYMBOL_GPL(tpm_register_hardware);
MODULE_AUTHOR("Leendert van Doorn (leendert@watson.ibm.com)");
*
*/
#include <linux/module.h>
-#include <linux/version.h>
#include <linux/pci.h>
#include <linux/delay.h>
#include <linux/fs.h>
#include <linux/miscdevice.h>
+#include <linux/platform_device.h>
enum tpm_timeout {
TPM_TIMEOUT = 5, /* msecs */
struct semaphore buffer_mutex;
struct timer_list user_read_timer; /* user needs to claim result */
+ struct work_struct work;
struct semaphore tpm_mutex; /* tpm is processing */
struct tpm_vendor_specific *vendor;
*/
#include "tpm.h"
-
-/* Atmel definitions */
-enum tpm_atmel_addr {
- TPM_ATMEL_BASE_ADDR_LO = 0x08,
- TPM_ATMEL_BASE_ADDR_HI = 0x09
-};
+#include "tpm_atmel.h"
/* write status bits */
enum tpm_atmel_write_status {
ATML_STATUS_READY = 0x08
};
-static int tpm_atml_recv(struct tpm_chip *chip, u8 * buf, size_t count)
+static int tpm_atml_recv(struct tpm_chip *chip, u8 *buf, size_t count)
{
u8 status, *hdr = buf;
u32 size;
return -EIO;
for (i = 0; i < 6; i++) {
- status = inb(chip->vendor->base + 1);
+ status = atmel_getb(chip, 1);
if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
- dev_err(chip->dev,
- "error reading header\n");
+ dev_err(chip->dev, "error reading header\n");
return -EIO;
}
- *buf++ = inb(chip->vendor->base);
+ *buf++ = atmel_getb(chip, 0);
}
/* size of the data received */
dev_err(chip->dev,
"Recv size(%d) less than available space\n", size);
for (; i < size; i++) { /* clear the waiting data anyway */
- status = inb(chip->vendor->base + 1);
+ status = atmel_getb(chip, 1);
if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
- dev_err(chip->dev,
- "error reading data\n");
+ dev_err(chip->dev, "error reading data\n");
return -EIO;
}
}
/* read all the data available */
for (; i < size; i++) {
- status = inb(chip->vendor->base + 1);
+ status = atmel_getb(chip, 1);
if ((status & ATML_STATUS_DATA_AVAIL) == 0) {
- dev_err(chip->dev,
- "error reading data\n");
+ dev_err(chip->dev, "error reading data\n");
return -EIO;
}
- *buf++ = inb(chip->vendor->base);
+ *buf++ = atmel_getb(chip, 0);
}
/* make sure data available is gone */
- status = inb(chip->vendor->base + 1);
+ status = atmel_getb(chip, 1);
if (status & ATML_STATUS_DATA_AVAIL) {
dev_err(chip->dev, "data available is stuck\n");
return -EIO;
return size;
}
-static int tpm_atml_send(struct tpm_chip *chip, u8 * buf, size_t count)
+static int tpm_atml_send(struct tpm_chip *chip, u8 *buf, size_t count)
{
int i;
dev_dbg(chip->dev, "tpm_atml_send:\n");
for (i = 0; i < count; i++) {
dev_dbg(chip->dev, "%d 0x%x(%d)\n", i, buf[i], buf[i]);
- outb(buf[i], chip->vendor->base);
+ atmel_putb(buf[i], chip, 0);
}
return count;
static void tpm_atml_cancel(struct tpm_chip *chip)
{
- outb(ATML_STATUS_ABORT, chip->vendor->base + 1);
+ atmel_putb(ATML_STATUS_ABORT, chip, 1);
}
static u8 tpm_atml_status(struct tpm_chip *chip)
{
- return inb(chip->vendor->base + 1);
+ return atmel_getb(chip, 1);
}
static struct file_operations atmel_ops = {
&dev_attr_pcrs.attr,
&dev_attr_caps.attr,
&dev_attr_cancel.attr,
- 0,
+ NULL,
};
static struct attribute_group atmel_attr_grp = { .attrs = atmel_attrs };
.miscdev = { .fops = &atmel_ops, },
};
-static int __devinit tpm_atml_init(struct pci_dev *pci_dev,
- const struct pci_device_id *pci_id)
+static struct platform_device *pdev;
+
+static void atml_plat_remove(void)
{
- u8 version[4];
- int rc = 0;
- int lo, hi;
+ struct tpm_chip *chip = dev_get_drvdata(&pdev->dev);
- if (pci_enable_device(pci_dev))
- return -EIO;
+ if (chip) {
+ if (chip->vendor->have_region)
+ atmel_release_region(chip->vendor->base,
+ chip->vendor->region_size);
+ atmel_put_base_addr(chip->vendor);
+ tpm_remove_hardware(chip->dev);
+ platform_device_unregister(pdev);
+ }
+}
+
+static struct device_driver atml_drv = {
+ .name = "tpm_atmel",
+ .bus = &platform_bus_type,
+ .owner = THIS_MODULE,
+ .suspend = tpm_pm_suspend,
+ .resume = tpm_pm_resume,
+};
- lo = tpm_read_index(TPM_ADDR, TPM_ATMEL_BASE_ADDR_LO);
- hi = tpm_read_index(TPM_ADDR, TPM_ATMEL_BASE_ADDR_HI);
+static int __init init_atmel(void)
+{
+ int rc = 0;
- tpm_atmel.base = (hi<<8)|lo;
- dev_dbg( &pci_dev->dev, "Operating with base: 0x%x\n", tpm_atmel.base);
+ driver_register(&atml_drv);
- /* verify that it is an Atmel part */
- if (tpm_read_index(TPM_ADDR, 4) != 'A' || tpm_read_index(TPM_ADDR, 5) != 'T'
- || tpm_read_index(TPM_ADDR, 6) != 'M' || tpm_read_index(TPM_ADDR, 7) != 'L') {
+ if ((tpm_atmel.iobase = atmel_get_base_addr(&tpm_atmel)) == NULL) {
rc = -ENODEV;
- goto out_err;
+ goto err_unreg_drv;
}
/* query chip for its version number */
check_tty_count(tty, "do_tty_hangup");
file_list_lock();
/* This breaks for file handles being sent over AF_UNIX sockets ? */
- list_for_each_entry(filp, &tty->tty_files, f_list) {
+ list_for_each_entry(filp, &tty->tty_files, f_u.fu_list) {
if (filp->f_op->write == redirected_tty_write)
cons_filp = filp;
if (filp->f_op->write != tty_write)
/* Release locally allocated memory ... nothing placed in slots */
free_mem_out:
- if (o_tp)
- kfree(o_tp);
+ kfree(o_tp);
if (o_tty)
free_tty_struct(o_tty);
- if (ltp)
- kfree(ltp);
- if (tp)
- kfree(tp);
+ kfree(ltp);
+ kfree(tp);
free_tty_struct(tty);
fail_no_mem:
pty_line_name(driver, index, name);
else
tty_line_name(driver, index, name);
- class_device_create(tty_class, dev, device, name);
+ class_device_create(tty_class, NULL, dev, device, "%s", name);
}
/**
register_chrdev_region(MKDEV(TTYAUX_MAJOR, 0), 1, "/dev/tty") < 0)
panic("Couldn't register /dev/tty driver\n");
devfs_mk_cdev(MKDEV(TTYAUX_MAJOR, 0), S_IFCHR|S_IRUGO|S_IWUGO, "tty");
- class_device_create(tty_class, MKDEV(TTYAUX_MAJOR, 0), NULL, "tty");
+ class_device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 0), NULL, "tty");
cdev_init(&console_cdev, &console_fops);
if (cdev_add(&console_cdev, MKDEV(TTYAUX_MAJOR, 1), 1) ||
register_chrdev_region(MKDEV(TTYAUX_MAJOR, 1), 1, "/dev/console") < 0)
panic("Couldn't register /dev/console driver\n");
devfs_mk_cdev(MKDEV(TTYAUX_MAJOR, 1), S_IFCHR|S_IRUSR|S_IWUSR, "console");
- class_device_create(tty_class, MKDEV(TTYAUX_MAJOR, 1), NULL, "console");
+ class_device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 1), NULL, "console");
#ifdef CONFIG_UNIX98_PTYS
cdev_init(&ptmx_cdev, &ptmx_fops);
register_chrdev_region(MKDEV(TTYAUX_MAJOR, 2), 1, "/dev/ptmx") < 0)
panic("Couldn't register /dev/ptmx driver\n");
devfs_mk_cdev(MKDEV(TTYAUX_MAJOR, 2), S_IFCHR|S_IRUGO|S_IWUGO, "ptmx");
- class_device_create(tty_class, MKDEV(TTYAUX_MAJOR, 2), NULL, "ptmx");
+ class_device_create(tty_class, NULL, MKDEV(TTYAUX_MAJOR, 2), NULL, "ptmx");
#endif
#ifdef CONFIG_VT
register_chrdev_region(MKDEV(TTY_MAJOR, 0), 1, "/dev/vc/0") < 0)
panic("Couldn't register /dev/tty0 driver\n");
devfs_mk_cdev(MKDEV(TTY_MAJOR, 0), S_IFCHR|S_IRUSR|S_IWUSR, "vc/0");
- class_device_create(tty_class, MKDEV(TTY_MAJOR, 0), NULL, "tty0");
+ class_device_create(tty_class, NULL, MKDEV(TTY_MAJOR, 0), NULL, "tty0");
vty_init();
out_vt:
config DELL_RBU
tristate "BIOS update support for DELL systems via sysfs"
+ depends on X86
select FW_LOADER
help
Say m if you want to have the option of updating the BIOS for your
config DCDBAS
tristate "Dell Systems Management Base Driver"
- depends on X86 || X86_64
- default m
+ depends on X86
help
The Dell Systems Management Base Driver provides a sysfs interface
for systems management software to perform System Management
# The new 8250/16550 serial drivers
config SERIAL_8250
tristate "8250/16550 and compatible serial support"
- depends on (BROKEN || !(SPARC64 || SPARC32 || XEN_DISABLE_SERIAL))
+ depends on (BROKEN || !SPARC)
+ depends on !XEN_DISABLE_SERIAL
select SERIAL_CORE
---help---
This selects whether you want to include the driver for the standard
system, say Y to this option. The driver can handle 1, 2, or 3 port
cards. If unsure, say N.
+config SERIAL_8250_AU1X00
+ bool "AU1X00 serial port support"
+ depends on SERIAL_8250 != n && SOC_AU1X00
+ help
+ If you have an Au1x00 board and want to use the serial port, say Y
+ to this option. The driver can handle 1 or 2 serial ports.
+ If unsure, say N.
+
comment "Non-8250 serial port support"
config SERIAL_AMBA_PL010
config SERIAL_SUNCORE
bool
- depends on SPARC32 || SPARC64
+ depends on SPARC
select SERIAL_CORE
select SERIAL_CORE_CONSOLE
default y
config SERIAL_SUNZILOG
tristate "Sun Zilog8530 serial support"
- depends on SPARC32 || SPARC64
+ depends on SPARC
help
This driver supports the Zilog8530 serial ports found on many Sparc
systems. Say Y or M if you want to be able to these serial ports.
config SERIAL_SUNSU
tristate "Sun SU serial support"
- depends on (SPARC32 || SPARC64) && PCI
+ depends on SPARC && PCI
help
This driver supports the 8250 serial ports that run the keyboard and
mouse on (PCI) UltraSPARC systems. Say Y or M if you want to be able
config SERIAL_MUX
tristate "Serial MUX support"
- depends on PARISC
+ depends on GSC
select SERIAL_CORE
default y
---help---
config SERIAL_SUNSAB
tristate "Sun Siemens SAB82532 serial support"
- depends on (SPARC32 || SPARC64) && PCI
+ depends on SPARC && PCI
help
This driver supports the Siemens SAB82532 DUSCC serial ports on newer
(PCI) UltraSPARC systems. Say Y or M if you want to be able to these
static struct timer_list balloon_timer;
/* Use the private and mapping fields of struct page as a list. */
-#define PAGE_TO_LIST(p) ((struct list_head *)&p->private)
+#define PAGE_TO_LIST(p) ((struct list_head *)&p->u.private)
#define LIST_TO_PAGE(l) \
- (list_entry(((unsigned long *)l), struct page, private))
+ (list_entry(((unsigned long *)l), struct page, u.private))
#define UNLIST_PAGE(p) \
do { \
list_del(PAGE_TO_LIST(p)); \
p->mapping = NULL; \
- p->private = 0; \
+ p->u.private = 0; \
} while(0)
#define IPRINTK(fmt, args...) \
static void cpu_bringup(void)
{
- if (!cpu_isset(smp_processor_id(), cpu_initialized))
+ if (!cpu_isset(smp_processor_id(), cpu_initialized)) {
cpu_init();
+ preempt_disable();
+ }
local_irq_enable();
cpu_idle();
}
config HUGETLBFS
bool "HugeTLB file system support"
- depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || X86_64 || BROKEN
+ depends X86 || IA64 || PPC64 || SPARC64 || SUPERH || BROKEN
depends !XEN
config HUGETLB_PAGE
config HFS_FS
tristate "Apple Macintosh file system support (EXPERIMENTAL)"
depends on EXPERIMENTAL
+ select NLS
help
If you say Y here, you will be able to mount Macintosh-formatted
floppy disks and hard drive partitions with full read-write access.
- NOR flash with transparent ECC
- DataFlash
+config JFFS2_SUMMARY
+ bool "JFFS2 summary support (EXPERIMENTAL)"
+ depends on JFFS2_FS && EXPERIMENTAL
+ default n
+ help
+ This feature makes it possible to use summary information
+ for faster filesystem mount.
+
+ The summary information can be inserted into a filesystem image
+ by the utility 'sumtool'.
+
+ If unsure, say 'N'.
+
config JFFS2_COMPRESSION_OPTIONS
bool "Advanced compression options for JFFS2"
depends on JFFS2_FS
default y
help
Zlib is designed to be a free, general-purpose, legally unencumbered,
- lossless data-compression library for use on virtually any computer
+ lossless data-compression library for use on virtually any computer
hardware and operating system. See <http://www.gzip.org/zlib/> for
further information.
-
+
Say 'Y' if unsure.
config JFFS2_RTIME
default JFFS2_CMODE_PRIORITY
depends on JFFS2_FS
help
- You can set here the default compression mode of JFFS2 from
+ You can set here the default compression mode of JFFS2 from
the available compression modes. Don't touch if unsure.
config JFFS2_CMODE_NONE
config JFFS2_CMODE_PRIORITY
bool "priority"
help
- Tries the compressors in a predefinied order and chooses the first
+ Tries the compressors in a predefinied order and chooses the first
successful one.
config JFFS2_CMODE_SIZE
bool "size (EXPERIMENTAL)"
help
- Tries all compressors and chooses the one which has the smallest
+ Tries all compressors and chooses the one which has the smallest
result.
endchoice
PC operating systems. The CIFS protocol is fully supported by
file servers such as Windows 2000 (including Windows 2003, NT 4
and Windows XP) as well by Samba (which provides excellent CIFS
- server support for Linux and many other operating systems). Currently
- you must use the smbfs client filesystem to access older SMB servers
- such as Windows 9x and OS/2.
+ server support for Linux and many other operating systems). Limited
+ support for Windows ME and similar servers is provided as well.
+ You must use the smbfs client filesystem to access older SMB servers
+ such as OS/2 and DOS.
The intent of the cifs module is to provide an advanced
network file system client for mounting to CIFS compliant servers,
cifs if running only a (Samba) server. It is possible to enable both
smbfs and cifs (e.g. if you are using CIFS for accessing Windows 2003
and Samba 3 servers, and smbfs for accessing old servers). If you need
- to mount to Samba or Windows 2003 servers from this machine, say Y.
+ to mount to Samba or Windows from this machine, say Y.
config CIFS_STATS
bool "CIFS statistics"
Enabling this option will cause statistics for each server share
mounted by the cifs client to be displayed in /proc/fs/cifs/Stats
+config CIFS_STATS2
+ bool "CIFS extended statistics"
+ depends on CIFS_STATS
+ help
+ Enabling this option will allow more detailed statistics on SMB
+ request timing to be displayed in /proc/fs/cifs/DebugData and also
+ allow optional logging of slow responses to dmesg (depending on the
+ value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
+ These additional statistics may have a minor effect on performance
+ and memory utilization.
+
+ Unless you are a developer or are doing network performance analysis
+ or tuning, say N.
+
config CIFS_XATTR
- bool "CIFS extended attributes (EXPERIMENTAL)"
+ bool "CIFS extended attributes"
depends on CIFS
help
Extended attributes are name:value pairs associated with inodes by
prefaced by the user namespace prefix. The system namespace
(used by some filesystems to store ACLs) is not supported at
this time.
-
+
If unsure, say N.
config CIFS_POSIX
- bool "CIFS POSIX Extensions (EXPERIMENTAL)"
+ bool "CIFS POSIX Extensions"
depends on CIFS_XATTR
help
Enabling this option will cause the cifs client to attempt to
config CIFS_EXPERIMENTAL
bool "CIFS Experimental Features (EXPERIMENTAL)"
- depends on CIFS
+ depends on CIFS && EXPERIMENTAL
+ help
+ Enables cifs features under testing. These features are
+ experimental and currently include support for writepages
+ (multipage writebehind performance improvements) and directory
+ change notification ie fcntl(F_DNOTIFY) as well as some security
+ improvements. Some also depend on setting at runtime the
+ pseudo-file /proc/fs/cifs/Experimental (which is disabled by
+ default). See the file fs/cifs/README for more details.
+
+ If unsure, say N.
+
+config CIFS_UPCALL
+ bool "CIFS Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)"
+ depends on CIFS_EXPERIMENTAL
+ select CONNECTOR
help
- Enables cifs features under testing. These features
- are highly experimental. If unsure, say N.
+ Enables an upcall mechanism for CIFS which will be used to contact
+ userspace helper utilities to provide SPNEGO packaged Kerberos
+ tickets which are needed to mount to certain secure servers
+ (for which more secure Kerberos authentication is required). If
+ unsure, say N.
config NCP_FS
tristate "NCP file system support (to mount NetWare volumes)"
return atomic_add_return(-i,v);
}
+#define atomic_cmpxchg(v, old, new) ((int)cmpxchg(&((v)->counter), old, new))
+
+/**
+ * atomic_add_unless - add unless the number is a given value
+ * @v: pointer of type atomic_t
+ * @a: the amount to add to v...
+ * @u: ...unless v is equal to u.
+ *
+ * Atomically adds @a to @v, so long as it was not @u.
+ * Returns non-zero if @v was not @u, and zero otherwise.
+ */
+#define atomic_add_unless(v, a, u) \
+({ \
+ int c, old; \
+ c = atomic_read(v); \
+ while (c != (u) && (old = atomic_cmpxchg((v), c, c + (a))) != c) \
+ c = old; \
+ c != (u); \
+})
+#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
+
#define atomic_inc_return(v) (atomic_add_return(1,v))
#define atomic_dec_return(v) (atomic_sub_return(1,v))
extern struct desc_struct cpu_gdt_table[NR_CPUS][GDT_ENTRIES];
+#define get_cpu_gdt_table(_cpu) ((struct desc_struct *)cpu_gdt_descr[(_cpu)].address)
+
DECLARE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
struct Xgt_desc_struct {
#define store_tr(tr) __asm__ ("str %0":"=mr" (tr))
#define store_ldt(ldt) __asm__ ("sldt %0":"=mr" (ldt))
-#define get_cpu_gdt_table(_cpu) ((struct desc_struct *)cpu_gdt_descr[(_cpu)].address)
-
/*
* This is the ldt that every process will get unless we need
* something other than this.
static inline void set_ldt_desc(unsigned int cpu, void *addr, unsigned int size)
{
- _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT],
- (int)addr, ((size << 3)-1), 0x82);
+ _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
}
#define LDT_entry_a(info) \
cpu_set(cpu, next->cpu_vm_mask);
/* Re-load page tables: load_cr3(next->pgd) */
- per_cpu(cur_pgd, cpu) = next->pgd;
op->cmd = MMUEXT_NEW_BASEPTR;
op->arg1.mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT);
op++;
-#include <linux/config.h>
-
#ifndef _ASMi386_PARAM_H
#define _ASMi386_PARAM_H
#ifdef __KERNEL__
+# include <linux/config.h>
# define HZ CONFIG_HZ /* Internal kernel timer frequency */
# define USER_HZ 100 /* .. some user interfaces are in "ticks" */
# define CLOCKS_PER_SEC (USER_HZ) /* like times() */
#define pfn_pte_ma(pfn, prot) __pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
-#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
-
-#define pmd_page_kernel(pmd) \
-((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-
/*
* All present user pages are user-executable:
*/
*/
static inline void pud_clear (pud_t * pud) { }
-#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
-
-#define pmd_page_kernel(pmd) \
-((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-
#define pud_page(pud) \
((struct page *) __va(pud_val(pud) & PAGE_MASK))
#include <linux/list.h>
#include <linux/spinlock.h>
+struct mm_struct;
+struct vm_area_struct;
+
/*
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
#define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
-#define pmd_none(x) (!pmd_val(x))
+/* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
+#define pmd_none(x) (!(unsigned long)pmd_val(x))
/* pmd_present doesn't just test the _PAGE_PRESENT bit since wr.p.t.
can temporarily clear it. */
#define pmd_present(x) (pmd_val(x))
return pte;
}
-#define page_pte(page) page_pte_prot(page, __pgprot(0))
-
#define pmd_large(pmd) \
((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT))
#define pte_offset_kernel(dir, address) \
((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
+#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT))
+
+#define pmd_page_kernel(pmd) \
+ ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+
/*
* Helper function that returns the kernel pagetable entry controlling
* the virtual address 'address'. NULL means no pagetable entry present.
int f00f_bug;
int coma_bug;
unsigned long loops_per_jiffy;
- unsigned char x86_num_cores;
+ unsigned char x86_max_cores; /* cpuid returned max cores value */
+ unsigned char booted_cores; /* number of cores as seen by OS */
+ unsigned char apicid;
} __attribute__((__aligned__(SMP_CACHE_BYTES)));
#define X86_VENDOR_INTEL 0
extern struct cpuinfo_x86 new_cpu_data;
extern struct tss_struct doublefault_tss;
DECLARE_PER_CPU(struct tss_struct, init_tss);
-DECLARE_PER_CPU(pgd_t *, cur_pgd);
#ifdef CONFIG_SMP
extern struct cpuinfo_x86 cpu_data[];
#define mtrr_bp_init() do {} while (0)
#endif
+#ifdef CONFIG_X86_MCE
+extern void mcheck_init(struct cpuinfo_x86 *c);
+#else
+#define mcheck_init(c) do {} while(0)
+#endif
+
#endif /* __ASM_I386_PROCESSOR_H */
#define MAX_APICID 256
extern u8 x86_cpu_to_apicid[];
+#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
+
#ifdef CONFIG_HOTPLUG_CPU
extern void cpu_exit_clear(void);
extern void cpu_uninit(void);
extern void __cpu_die(unsigned int cpu);
#endif /* !__ASSEMBLY__ */
+#else /* CONFIG_SMP */
+
+#define cpu_physical_id(cpu) boot_cpu_physical_apicid
+
#define NO_PROC_ID 0xFF /* No processor magic marker */
#endif
#define write_cr0(x) \
__asm__ __volatile__("movl %0,%%cr0": :"r" (x));
-#define read_cr2() ({ \
- unsigned int __dummy; \
- __asm__ __volatile__( \
- "movl %%cr2,%0\n\t" \
- :"=r" (__dummy)); \
- __dummy; \
-})
+#define read_cr2() \
+ (HYPERVISOR_shared_info->vcpu_info[smp_processor_id()].arch.cr2)
#define write_cr2(x) \
__asm__ __volatile__("movl %0,%%cr2": :"r" (x));
-#define read_cr3() per_cpu(cur_pgd, smp_processor_id())
-#define write_cr3(x) do { \
- xen_pt_switch((x)); \
- per_cpu(cur_pgd, smp_processor_id()) = (x); \
-} while (/* CONSTCOND */0)
+#define read_cr3() ({ \
+ unsigned int __dummy; \
+ __asm__ ( \
+ "movl %%cr3,%0\n\t" \
+ :"=r" (__dummy)); \
+ machine_to_phys(__dummy); \
+})
+#define write_cr3(x) ({ \
+ maddr_t __dummy = phys_to_machine(x); \
+ __asm__ __volatile__("movl %0,%%cr3": :"r" (__dummy)); \
+})
#define read_cr4() ({ \
unsigned int __dummy; \
})
#define write_cr4(x) \
__asm__ __volatile__("movl %0,%%cr4": :"r" (x));
-
#define stts() (HYPERVISOR_fpu_taskswitch(1))
#endif /* __KERNEL__ */
#define __xg(x) ((struct __xchg_dummy *)(x))
+#ifdef CONFIG_X86_CMPXCHG64
+
/*
* The semantics of XCHGCMP8B are a bit strange, this is why
* there is a loop and the loading of %%eax and %%edx has to
__set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
__set_64bit(ptr, ll_low(value), ll_high(value)) )
+#endif
+
/*
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway
* Note 2: xchg has side effect, so that attribute volatile is necessary,
#ifdef CONFIG_X86_CMPXCHG
#define __HAVE_ARCH_CMPXCHG 1
+#define cmpxchg(ptr,o,n)\
+ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+ (unsigned long)(n),sizeof(*(ptr))))
#endif
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
case 2:
__asm__ __volatile__(LOCK "cmpxchgw %w1,%2"
: "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
return prev;
case 4:
__asm__ __volatile__(LOCK "cmpxchgl %1,%2"
: "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
return prev;
}
return old;
}
-#define cmpxchg(ptr,o,n)\
- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
- (unsigned long)(n),sizeof(*(ptr))))
+#ifndef CONFIG_X86_CMPXCHG
+/*
+ * Building a kernel capable running on 80386. It may be necessary to
+ * simulate the cmpxchg on the 80386 CPU. For that purpose we define
+ * a function for each of the sizes we support.
+ */
+
+extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
+extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
+extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
+
+static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
+ unsigned long new, int size)
+{
+ switch (size) {
+ case 1:
+ return cmpxchg_386_u8(ptr, old, new);
+ case 2:
+ return cmpxchg_386_u16(ptr, old, new);
+ case 4:
+ return cmpxchg_386_u32(ptr, old, new);
+ }
+ return old;
+}
+
+#define cmpxchg(ptr,o,n) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ if (likely(boot_cpu_data.x86 > 3)) \
+ __ret = __cmpxchg((ptr), (unsigned long)(o), \
+ (unsigned long)(n), sizeof(*(ptr))); \
+ else \
+ __ret = cmpxchg_386((ptr), (unsigned long)(o), \
+ (unsigned long)(n), sizeof(*(ptr))); \
+ __ret; \
+})
+#endif
+
+#ifdef CONFIG_X86_CMPXCHG64
+
+static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old,
+ unsigned long long new)
+{
+ unsigned long long prev;
+ __asm__ __volatile__(LOCK "cmpxchg8b %3"
+ : "=A"(prev)
+ : "b"((unsigned long)new),
+ "c"((unsigned long)(new >> 32)),
+ "m"(*__xg(ptr)),
+ "0"(old)
+ : "memory");
+ return prev;
+}
+
+#define cmpxchg64(ptr,o,n)\
+ ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\
+ (unsigned long long)(n)))
+
+#endif
#ifdef __KERNEL__
struct alt_instr {
return tmp+delta;
}
+static inline int rwsem_is_locked(struct rw_semaphore *sem)
+{
+ return (sem->count != 0);
+}
+
#endif /* __KERNEL__ */
#endif /* _I386_RWSEM_H */
#define __xg(x) ((struct __xchg_dummy *)(x))
+#ifdef CONFIG_X86_CMPXCHG64
+
/*
* The semantics of XCHGCMP8B are a bit strange, this is why
* there is a loop and the loading of %%eax and %%edx has to
__set_64bit(ptr, (unsigned int)(value), (unsigned int)((value)>>32ULL) ) : \
__set_64bit(ptr, ll_low(value), ll_high(value)) )
+#endif
+
/*
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway
* Note 2: xchg has side effect, so that attribute volatile is necessary,
#ifdef CONFIG_X86_CMPXCHG
#define __HAVE_ARCH_CMPXCHG 1
+#define cmpxchg(ptr,o,n)\
+ ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
+ (unsigned long)(n),sizeof(*(ptr))))
#endif
static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
case 2:
__asm__ __volatile__(LOCK "cmpxchgw %w1,%2"
: "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
return prev;
case 4:
__asm__ __volatile__(LOCK "cmpxchgl %1,%2"
: "=a"(prev)
- : "q"(new), "m"(*__xg(ptr)), "0"(old)
+ : "r"(new), "m"(*__xg(ptr)), "0"(old)
: "memory");
return prev;
}
return old;
}
-#define cmpxchg(ptr,o,n)\
- ((__typeof__(*(ptr)))__cmpxchg((ptr),(unsigned long)(o),\
- (unsigned long)(n),sizeof(*(ptr))))
+#ifndef CONFIG_X86_CMPXCHG
+/*
+ * Building a kernel capable running on 80386. It may be necessary to
+ * simulate the cmpxchg on the 80386 CPU. For that purpose we define
+ * a function for each of the sizes we support.
+ */
+
+extern unsigned long cmpxchg_386_u8(volatile void *, u8, u8);
+extern unsigned long cmpxchg_386_u16(volatile void *, u16, u16);
+extern unsigned long cmpxchg_386_u32(volatile void *, u32, u32);
+
+static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old,
+ unsigned long new, int size)
+{
+ switch (size) {
+ case 1:
+ return cmpxchg_386_u8(ptr, old, new);
+ case 2:
+ return cmpxchg_386_u16(ptr, old, new);
+ case 4:
+ return cmpxchg_386_u32(ptr, old, new);
+ }
+ return old;
+}
+
+#define cmpxchg(ptr,o,n) \
+({ \
+ __typeof__(*(ptr)) __ret; \
+ if (likely(boot_cpu_data.x86 > 3)) \
+ __ret = __cmpxchg((ptr), (unsigned long)(o), \
+ (unsigned long)(n), sizeof(*(ptr))); \
+ else \
+ __ret = cmpxchg_386((ptr), (unsigned long)(o), \
+ (unsigned long)(n), sizeof(*(ptr))); \
+ __ret; \
+})
+#endif
+
+#ifdef CONFIG_X86_CMPXCHG64
+
+static inline unsigned long long __cmpxchg64(volatile void *ptr, unsigned long long old,
+ unsigned long long new)
+{
+ unsigned long long prev;
+ __asm__ __volatile__(LOCK "cmpxchg8b %3"
+ : "=A"(prev)
+ : "b"((unsigned long)new),
+ "c"((unsigned long)(new >> 32)),
+ "m"(*__xg(ptr)),
+ "0"(old)
+ : "memory");
+ return prev;
+}
+
+#define cmpxchg64(ptr,o,n)\
+ ((__typeof__(*(ptr)))__cmpxchg64((ptr),(unsigned long long)(o),\
+ (unsigned long long)(n)))
+
+#endif
#ifdef __KERNEL__
struct alt_instr {
#define pfn_valid(pfn) ((pfn) < max_mapnr)
#define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v)))
-extern struct page *arch_validate(struct page *page, int mask, int order);
+extern struct page *arch_validate(struct page *page, gfp_t mask, int order);
#define HAVE_ARCH_VALIDATE
extern int arch_free_page(struct page *page, int order);
static inline void set_intr_gate(int nr, void *func)
{
+ BUG_ON((unsigned)nr > 0xFF);
_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, 0);
}
static inline void set_intr_gate_ist(int nr, void *func, unsigned ist)
{
+ BUG_ON((unsigned)nr > 0xFF);
_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 0, ist);
}
static inline void set_system_gate(int nr, void *func)
{
+ BUG_ON((unsigned)nr > 0xFF);
_set_gate(&idt_table[nr], GATE_INTERRUPT, (unsigned long) func, 3, 0);
}
static inline void set_tss_desc(unsigned cpu, void *addr)
{
+ /*
+ * sizeof(unsigned long) coming from an extra "long" at the end
+ * of the iobitmap. See tss_struct definition in processor.h
+ *
+ * -1? seg base+limit should be pointing to the address of the
+ * last valid byte
+ */
set_tssldt_descriptor((struct ldttss_desc *)&get_cpu_gdt_table(cpu)[GDT_ENTRY_TSS],
- (unsigned long)addr,
- DESC_TSS,
- sizeof(struct tss_struct) - 1);
+ (unsigned long)addr, DESC_TSS,
+ IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1);
}
static inline void set_ldt_desc(unsigned cpu, void *addr, int size)
#define CALL_FUNCTION_VECTOR 0xfc
#define KDB_VECTOR 0xfb /* reserved for KDB */
#define THERMAL_APIC_VECTOR 0xfa
-/* 0xf9 free */
+#define THRESHOLD_APIC_VECTOR 0xf9
#define INVALIDATE_TLB_VECTOR_END 0xf8
#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f8 used for TLB flush */
extern void mm_unpin(struct mm_struct *mm);
void mm_pin_all(void);
+static inline void load_cr3(pgd_t *pgd)
+{
+ asm volatile("movq %0,%%cr3" :: "r" (phys_to_machine(__pa(pgd))) :
+ "memory");
+}
+
static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
set_bit(cpu, &next->cpu_vm_mask);
/* load_cr3(next->pgd) */
- per_cpu(cur_pgd, smp_processor_id()) = next->pgd;
op->cmd = MMUEXT_NEW_BASEPTR;
op->arg1.mfn = pfn_to_mfn(__pa(next->pgd) >> PAGE_SHIFT);
op++;
#define PAGE_SIZE (1UL << PAGE_SHIFT)
#endif
#define PAGE_MASK (~(PAGE_SIZE-1))
-#define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & (__PHYSICAL_MASK << PAGE_SHIFT))
+#define PHYSICAL_PAGE_MASK (~(PAGE_SIZE-1) & __PHYSICAL_MASK)
#define THREAD_ORDER 1
#ifdef __ASSEMBLY__
-#include <linux/config.h>
-
#ifndef _ASMx86_64_PARAM_H
#define _ASMx86_64_PARAM_H
#ifdef __KERNEL__
+# include <linux/config.h>
# define HZ CONFIG_HZ /* Internal kernel timer frequency */
# define USER_HZ 100 /* .. some user interfaces are in "ticks */
# define CLOCKS_PER_SEC (USER_HZ) /* like times() */
extern pud_t level3_ident_pgt[512];
extern pmd_t level2_kernel_pgt[512];
extern pgd_t init_level4_pgt[];
+extern pgd_t boot_level4_pgt[];
extern unsigned long __supported_pte_mask;
#define swapper_pg_dir init_level4_pgt
}
#endif
+struct mm_struct;
+
static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full)
{
pte_t pte;
* and a page entry and page directory to the page they refer to.
*/
-#define page_pte(page) page_pte_prot(page, __pgprot(0))
-
/*
* Level 4 access.
* Never use these in the common code.
#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)
#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_PRESENT)) != (_KERNPG_TABLE & ~_PAGE_PRESENT))
#define pfn_pmd(nr,prot) (__pmd(((nr) << PAGE_SHIFT) | pgprot_val(prot)))
-#define pmd_pfn(x) ((pmd_val(x) >> PAGE_SHIFT) & __PHYSICAL_MASK)
+#define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
#define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
#define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE })
int x86_cache_alignment;
int x86_tlbsize; /* number of 4K pages in DTLB/ITLB combined(in pages)*/
__u8 x86_virt_bits, x86_phys_bits;
- __u8 x86_num_cores;
+ __u8 x86_max_cores; /* cpuid returned max cores value */
__u32 x86_power;
__u32 extended_cpuid_level; /* Max extended CPUID function supported */
unsigned long loops_per_jiffy;
+ __u8 apicid;
+ __u8 booted_cores; /* number of cores as seen by OS */
} ____cacheline_aligned;
#define X86_VENDOR_INTEL 0
}
-#define load_cr3(pgdir) do { \
- xen_pt_switch(__pa(pgdir)); \
- per_cpu(cur_pgd, smp_processor_id()) = pgdir; \
-} while (/* CONSTCOND */0)
-
/*
* Bus types
*/
extern struct cpuinfo_x86 boot_cpu_data;
DECLARE_PER_CPU(struct tss_struct,init_tss);
-DECLARE_PER_CPU(pgd_t *, cur_pgd);
#define ARCH_MIN_TASKALIGN 16
extern void unlock_ipi_call_lock(void);
extern int smp_num_siblings;
extern void smp_send_reschedule(int cpu);
-extern void zap_low_mappings(void);
void smp_stop_cpu(void);
extern int smp_call_function_single(int cpuid, void (*func) (void *info),
void *info, int retry, int wait);
extern int __cpu_disable(void);
extern void __cpu_die(unsigned int cpu);
extern void prefill_possible_map(void);
+extern unsigned num_processors;
+extern unsigned disabled_cpus;
#endif /* !ASSEMBLY */
#endif
#endif
+#ifdef CONFIG_SMP
+#define cpu_physical_id(cpu) x86_cpu_to_apicid[cpu]
+#else
+#define cpu_physical_id(cpu) boot_cpu_id
+#endif
+
#endif
#ifdef __KERNEL__
+#ifdef CONFIG_SMP
+#define __vcpu_id smp_processor_id()
+#else
+#define __vcpu_id 0
+#endif
+
#ifdef CONFIG_SMP
#define LOCK_PREFIX "lock ; "
#else
do { \
vcpu_info_t *_vcpu; \
preempt_disable(); \
- _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
_vcpu->evtchn_upcall_mask = 1; \
preempt_enable_no_resched(); \
barrier(); \
vcpu_info_t *_vcpu; \
barrier(); \
preempt_disable(); \
- _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
_vcpu->evtchn_upcall_mask = 0; \
barrier(); /* unmask then check (avoid races) */ \
if ( unlikely(_vcpu->evtchn_upcall_pending) ) \
do { \
vcpu_info_t *_vcpu; \
preempt_disable(); \
- _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
(x) = _vcpu->evtchn_upcall_mask; \
preempt_enable(); \
} while (0)
vcpu_info_t *_vcpu; \
barrier(); \
preempt_disable(); \
- _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \
barrier(); /* unmask then check (avoid races) */ \
if ( unlikely(_vcpu->evtchn_upcall_pending) ) \
do { \
vcpu_info_t *_vcpu; \
preempt_disable(); \
- _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
(x) = _vcpu->evtchn_upcall_mask; \
_vcpu->evtchn_upcall_mask = 1; \
preempt_enable_no_resched(); \
({ int ___x; \
vcpu_info_t *_vcpu; \
preempt_disable(); \
- _vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
+ _vcpu = &HYPERVISOR_shared_info->vcpu_info[__vcpu_id]; \
___x = (_vcpu->evtchn_upcall_mask != 0); \
preempt_enable_no_resched(); \
___x; })
+++ /dev/null
-/*
- * arch/i386/mach-generic/io_ports.h
- *
- * Machine specific IO port address definition for generic.
- * Written by Osamu Tomita <tomita@cinet.co.jp>
- */
-#ifndef _MACH_IO_PORTS_H
-#define _MACH_IO_PORTS_H
-
-/* i8253A PIT registers */
-#define PIT_MODE 0x43
-#define PIT_CH0 0x40
-#define PIT_CH2 0x42
-
-/* i8259A PIC registers */
-#define PIC_MASTER_CMD 0x20
-#define PIC_MASTER_IMR 0x21
-#define PIC_MASTER_ISR PIC_MASTER_CMD
-#define PIC_MASTER_POLL PIC_MASTER_ISR
-#define PIC_MASTER_OCW3 PIC_MASTER_ISR
-#define PIC_SLAVE_CMD 0xa0
-#define PIC_SLAVE_IMR 0xa1
-
-/* i8259A PIC related value */
-#define PIC_CASCADE_IR 2
-#define MASTER_ICW4_DEFAULT 0x01
-#define SLAVE_ICW4_DEFAULT 0x01
-#define PIC_ICW4_AEOI 2
-
-#endif /* !_MACH_IO_PORTS_H */
/*
* GFP bitmasks..
*/
-/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low two bits) */
-#define __GFP_DMA 0x01u
-#define __GFP_HIGHMEM 0x02u
+/* Zone modifiers in GFP_ZONEMASK (see linux/mmzone.h - low three bits) */
+#define __GFP_DMA ((__force gfp_t)0x01u)
+#define __GFP_HIGHMEM ((__force gfp_t)0x02u)
+#ifdef CONFIG_DMA_IS_DMA32
+#define __GFP_DMA32 ((__force gfp_t)0x01) /* ZONE_DMA is ZONE_DMA32 */
+#elif BITS_PER_LONG < 64
+#define __GFP_DMA32 ((__force gfp_t)0x00) /* ZONE_NORMAL is ZONE_DMA32 */
+#else
+#define __GFP_DMA32 ((__force gfp_t)0x04) /* Has own ZONE_DMA32 */
+#endif
/*
* Action modifiers - doesn't change the zoning
*
* __GFP_NORETRY: The VM implementation must not retry indefinitely.
*/
-#define __GFP_WAIT 0x10u /* Can wait and reschedule? */
-#define __GFP_HIGH 0x20u /* Should access emergency pools? */
-#define __GFP_IO 0x40u /* Can start physical IO? */
-#define __GFP_FS 0x80u /* Can call down to low-level FS? */
-#define __GFP_COLD 0x100u /* Cache-cold page required */
-#define __GFP_NOWARN 0x200u /* Suppress page allocation failure warning */
-#define __GFP_REPEAT 0x400u /* Retry the allocation. Might fail */
-#define __GFP_NOFAIL 0x800u /* Retry for ever. Cannot fail */
-#define __GFP_NORETRY 0x1000u /* Do not retry. Might fail */
-#define __GFP_NO_GROW 0x2000u /* Slab internal usage */
-#define __GFP_COMP 0x4000u /* Add compound page metadata */
-#define __GFP_ZERO 0x8000u /* Return zeroed page on success */
-#define __GFP_NOMEMALLOC 0x10000u /* Don't use emergency reserves */
-#define __GFP_NORECLAIM 0x20000u /* No realy zone reclaim during allocation */
-#define __GFP_HARDWALL 0x40000u /* Enforce hardwall cpuset memory allocs */
+#define __GFP_WAIT ((__force gfp_t)0x10u) /* Can wait and reschedule? */
+#define __GFP_HIGH ((__force gfp_t)0x20u) /* Should access emergency pools? */
+#define __GFP_IO ((__force gfp_t)0x40u) /* Can start physical IO? */
+#define __GFP_FS ((__force gfp_t)0x80u) /* Can call down to low-level FS? */
+#define __GFP_COLD ((__force gfp_t)0x100u) /* Cache-cold page required */
+#define __GFP_NOWARN ((__force gfp_t)0x200u) /* Suppress page allocation failure warning */
+#define __GFP_REPEAT ((__force gfp_t)0x400u) /* Retry the allocation. Might fail */
+#define __GFP_NOFAIL ((__force gfp_t)0x800u) /* Retry for ever. Cannot fail */
+#define __GFP_NORETRY ((__force gfp_t)0x1000u)/* Do not retry. Might fail */
+#define __GFP_NO_GROW ((__force gfp_t)0x2000u)/* Slab internal usage */
+#define __GFP_COMP ((__force gfp_t)0x4000u)/* Add compound page metadata */
+#define __GFP_ZERO ((__force gfp_t)0x8000u)/* Return zeroed page on success */
+#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
+#define __GFP_HARDWALL ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
#define __GFP_BITS_SHIFT 20 /* Room for 20 __GFP_FOO bits */
-#define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
+#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
/* if you forget to add the bitmask here kernel will crash, period */
#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP| \
- __GFP_NOMEMALLOC|__GFP_NORECLAIM|__GFP_HARDWALL)
+ __GFP_NOMEMALLOC|__GFP_HARDWALL)
#define GFP_ATOMIC (__GFP_HIGH)
#define GFP_NOIO (__GFP_WAIT)
#define GFP_DMA __GFP_DMA
+/* 4GB DMA on some platforms */
+#define GFP_DMA32 __GFP_DMA32
+
+
+static inline int gfp_zone(gfp_t gfp)
+{
+ int zone = GFP_ZONEMASK & (__force int) gfp;
+ BUG_ON(zone >= GFP_ZONETYPES);
+ return zone;
+}
/*
* There is only one page-allocator function, and two main namespaces to
return NULL;
return __alloc_pages(gfp_mask, order,
- NODE_DATA(nid)->node_zonelists + (gfp_mask & GFP_ZONEMASK));
+ NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
}
#ifdef CONFIG_NUMA
*/
#include <linux/config.h>
+#include <linux/smp.h>
#if !defined(CONFIG_ARCH_S390)
#define VM_GROWSDOWN 0x00000100 /* general info on the segment */
#define VM_GROWSUP 0x00000200
-#define VM_SHM 0x00000400 /* shared memory area, don't swap out */
+#define VM_SHM 0x00000000 /* Means nothing: delete it later */
+#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */
#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */
#define VM_EXECUTABLE 0x00001000
#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */
#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */
-#define VM_RESERVED 0x00080000 /* Don't unmap it from swap_out */
+#define VM_RESERVED 0x00080000 /* Count as reserved_vm like IO */
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
#define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */
+#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */
#ifdef CONFIG_XEN
-#define VM_FOREIGN 0x02000000 /* Has pages belonging to another VM */
+#define VM_FOREIGN 0x04000000 /* Has pages belonging to another VM */
#endif
#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
struct mmu_gather;
struct inode;
-#ifdef ARCH_HAS_ATOMIC_UNSIGNED
-typedef unsigned page_flags_t;
-#else
-typedef unsigned long page_flags_t;
-#endif
-
/*
* Each physical page in the system has a struct page associated with
* it to keep track of whatever it is we are using the page for at the
* a page.
*/
struct page {
- page_flags_t flags; /* Atomic flags, some possibly
+ unsigned long flags; /* Atomic flags, some possibly
* updated asynchronously */
atomic_t _count; /* Usage count, see below. */
atomic_t _mapcount; /* Count of ptes mapped in mms,
* to show when page is mapped
* & limit reverse map searches.
*/
- unsigned long private; /* Mapping-private opaque data:
+ union {
+ unsigned long private; /* Mapping-private opaque data:
* usually used for buffer_heads
* if PagePrivate set; used for
* swp_entry_t if PageSwapCache
* When page is free, this indicates
* order in the buddy system.
*/
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+ spinlock_t ptl;
+#endif
+ } u;
struct address_space *mapping; /* If low bit clear, points to
* inode address_space, or NULL.
* If page mapped as anonymous
#endif /* WANT_PAGE_VIRTUAL */
};
+#define page_private(page) ((page)->u.private)
+#define set_page_private(page, v) ((page)->u.private = (v))
+
/*
* FIXME: take this include out, include page-flags.h in
* files which need it (119 of them)
extern void FASTCALL(__page_cache_release(struct page *));
-#ifdef CONFIG_HUGETLB_PAGE
-
-static inline int page_count(struct page *p)
+static inline int page_count(struct page *page)
{
- if (PageCompound(p))
- p = (struct page *)p->private;
- return atomic_read(&(p)->_count) + 1;
+ if (PageCompound(page))
+ page = (struct page *)page_private(page);
+ return atomic_read(&page->_count) + 1;
}
static inline void get_page(struct page *page)
{
if (unlikely(PageCompound(page)))
- page = (struct page *)page->private;
+ page = (struct page *)page_private(page);
atomic_inc(&page->_count);
}
void put_page(struct page *page);
-#else /* CONFIG_HUGETLB_PAGE */
-
-#define page_count(p) (atomic_read(&(p)->_count) + 1)
-
-static inline void get_page(struct page *page)
-{
- atomic_inc(&page->_count);
-}
-
-static inline void put_page(struct page *page)
-{
- if (!PageReserved(page) && put_page_testzero(page))
- __page_cache_release(page);
-}
-
-#endif /* CONFIG_HUGETLB_PAGE */
-
/*
* Multiple processes may "see" the same page. E.g. for untouched
* mappings of /dev/null, all processes see the same page full of
#endif
/* Page flags: | [SECTION] | [NODE] | ZONE | ... | FLAGS | */
-#define SECTIONS_PGOFF ((sizeof(page_flags_t)*8) - SECTIONS_WIDTH)
+#define SECTIONS_PGOFF ((sizeof(unsigned long)*8) - SECTIONS_WIDTH)
#define NODES_PGOFF (SECTIONS_PGOFF - NODES_WIDTH)
#define ZONES_PGOFF (NODES_PGOFF - ZONES_WIDTH)
static inline pgoff_t page_index(struct page *page)
{
if (unlikely(PageSwapCache(page)))
- return page->private;
+ return page_private(page);
return page->index;
}
unsigned long truncate_count; /* Compare vm_truncate_count */
};
+struct page *vm_normal_page(struct vm_area_struct *, unsigned long, pte_t);
unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address,
unsigned long size, struct zap_details *);
-unsigned long unmap_vmas(struct mmu_gather **tlb, struct mm_struct *mm,
+unsigned long unmap_vmas(struct mmu_gather **tlb,
struct vm_area_struct *start_vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *);
}
extern int vmtruncate(struct inode * inode, loff_t offset);
-extern pud_t *FASTCALL(__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address));
-extern pmd_t *FASTCALL(__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address));
-extern pte_t *FASTCALL(pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
-extern pte_t *FASTCALL(pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address));
extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot);
extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot);
extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long start,
int len, int write, int force, struct page **pages, struct vm_area_struct **vmas);
+void print_bad_pte(struct vm_area_struct *, pte_t, unsigned long);
int __set_page_dirty_buffers(struct page *page);
int __set_page_dirty_nobuffers(struct page *page);
* The callback will be passed nr_to_scan == 0 when the VM is querying the
* cache size, so a fastpath for that case is appropriate.
*/
-typedef int (*shrinker_t)(int nr_to_scan, unsigned int gfp_mask);
+typedef int (*shrinker_t)(int nr_to_scan, gfp_t gfp_mask);
/*
* Add an aging callback. The int is the number of 'seeks' it takes
extern struct shrinker *set_shrinker(int, shrinker_t);
extern void remove_shrinker(struct shrinker *shrinker);
-/*
- * On a two-level or three-level page table, this ends up being trivial. Thus
- * the inlining and the symmetry break with pte_alloc_map() that does all
- * of this out-of-line.
- */
+extern pte_t *FASTCALL(get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl));
+
+int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address);
+int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address);
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address);
+int __pte_alloc_kernel(pmd_t *pmd, unsigned long address);
+
/*
* The following ifdef needed to get the 4level-fixup.h header to work.
* Remove it when 4level-fixup.h has been removed.
*/
-#ifdef CONFIG_MMU
-#ifndef __ARCH_HAS_4LEVEL_HACK
+#if defined(CONFIG_MMU) && !defined(__ARCH_HAS_4LEVEL_HACK)
static inline pud_t *pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
{
- if (pgd_none(*pgd))
- return __pud_alloc(mm, pgd, address);
- return pud_offset(pgd, address);
+ return (unlikely(pgd_none(*pgd)) && __pud_alloc(mm, pgd, address))?
+ NULL: pud_offset(pgd, address);
}
static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
- if (pud_none(*pud))
- return __pmd_alloc(mm, pud, address);
- return pmd_offset(pud, address);
+ return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
+ NULL: pmd_offset(pud, address);
}
-#endif
-#endif /* CONFIG_MMU */
+#endif /* CONFIG_MMU && !__ARCH_HAS_4LEVEL_HACK */
+
+#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
+/*
+ * We tuck a spinlock to guard each pagetable page into its struct page,
+ * at page->private, with BUILD_BUG_ON to make sure that this will not
+ * overflow into the next struct page (as it might with DEBUG_SPINLOCK).
+ * When freeing, reset page->mapping so free_pages_check won't complain.
+ */
+#define __pte_lockptr(page) &((page)->u.ptl)
+#define pte_lock_init(_page) do { \
+ spin_lock_init(__pte_lockptr(_page)); \
+} while (0)
+#define pte_lock_deinit(page) ((page)->mapping = NULL)
+#define pte_lockptr(mm, pmd) ({(void)(mm); __pte_lockptr(pmd_page(*(pmd)));})
+#else
+/*
+ * We use mm->page_table_lock to guard all pagetable pages of the mm.
+ */
+#define pte_lock_init(page) do {} while (0)
+#define pte_lock_deinit(page) do {} while (0)
+#define pte_lockptr(mm, pmd) ({(void)(pmd); &(mm)->page_table_lock;})
+#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
+
+#define pte_offset_map_lock(mm, pmd, address, ptlp) \
+({ \
+ spinlock_t *__ptl = pte_lockptr(mm, pmd); \
+ pte_t *__pte = pte_offset_map(pmd, address); \
+ *(ptlp) = __ptl; \
+ spin_lock(__ptl); \
+ __pte; \
+})
+
+#define pte_unmap_unlock(pte, ptl) do { \
+ spin_unlock(ptl); \
+ pte_unmap(pte); \
+} while (0)
+
+#define pte_alloc_map(mm, pmd, address) \
+ ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+ NULL: pte_offset_map(pmd, address))
+
+#define pte_alloc_map_lock(mm, pmd, address, ptlp) \
+ ((unlikely(!pmd_present(*(pmd))) && __pte_alloc(mm, pmd, address))? \
+ NULL: pte_offset_map_lock(mm, pmd, address, ptlp))
+
+#define pte_alloc_kernel(pmd, address) \
+ ((unlikely(!pmd_present(*(pmd))) && __pte_alloc_kernel(pmd, address))? \
+ NULL: pte_offset_kernel(pmd, address))
extern void free_area_init(unsigned long * zones_size);
extern void free_area_init_node(int nid, pg_data_t *pgdat,
unsigned long * zones_size, unsigned long zone_start_pfn,
unsigned long *zholes_size);
extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
+extern void setup_per_zone_pages_min(void);
extern void mem_init(void);
extern void show_mem(void);
extern void si_meminfo(struct sysinfo * val);
extern int insert_vm_struct(struct mm_struct *, struct vm_area_struct *);
extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
struct rb_node **, struct rb_node *);
+extern void unlink_file_vma(struct vm_area_struct *);
extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
unsigned long addr, unsigned long len, pgoff_t pgoff);
extern void exit_mmap(struct mm_struct *);
* turning readahead off */
int do_page_cache_readahead(struct address_space *mapping, struct file *filp,
- unsigned long offset, unsigned long nr_to_read);
+ pgoff_t offset, unsigned long nr_to_read);
int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
- unsigned long offset, unsigned long nr_to_read);
-unsigned long page_cache_readahead(struct address_space *mapping,
+ pgoff_t offset, unsigned long nr_to_read);
+unsigned long page_cache_readahead(struct address_space *mapping,
struct file_ra_state *ra,
struct file *filp,
- unsigned long offset,
+ pgoff_t offset,
unsigned long size);
void handle_ra_miss(struct address_space *mapping,
struct file_ra_state *ra, pgoff_t offset);
unsigned long max_sane_readahead(unsigned long nr);
/* Do stack extension */
-extern int expand_stack(struct vm_area_struct * vma, unsigned long address);
+extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
+#ifdef CONFIG_IA64
+extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
+#endif
/* Look up the first VMA which satisfies addr < vm_end, NULL if none. */
extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long addr);
return (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
}
-extern struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr);
+struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
+struct page *vmalloc_to_page(void *addr);
+unsigned long vmalloc_to_pfn(void *addr);
+int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
+ unsigned long pfn, unsigned long size, pgprot_t);
+int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
-extern struct page * vmalloc_to_page(void *addr);
-extern unsigned long vmalloc_to_pfn(void *addr);
-extern struct page * follow_page(struct mm_struct *mm, unsigned long address,
- int write);
-extern int check_user_page_readable(struct mm_struct *mm, unsigned long address);
-int remap_pfn_range(struct vm_area_struct *, unsigned long,
- unsigned long, unsigned long, pgprot_t);
+struct page *follow_page(struct vm_area_struct *, unsigned long address,
+ unsigned int foll_flags);
+#define FOLL_WRITE 0x01 /* check pte is writable */
+#define FOLL_TOUCH 0x02 /* mark page accessed */
+#define FOLL_GET 0x04 /* do get_page on page */
+#define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */
#ifdef CONFIG_XEN
typedef int (*pte_fn_t)(pte_t *pte, struct page *pte_page, unsigned long addr,
#endif
#ifdef CONFIG_PROC_FS
-void __vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
+void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);
#else
-static inline void __vm_stat_account(struct mm_struct *mm,
+static inline void vm_stat_account(struct mm_struct *mm,
unsigned long flags, struct file *file, long pages)
{
}
#endif /* CONFIG_PROC_FS */
-static inline void vm_stat_account(struct vm_area_struct *vma)
-{
- __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
- vma_pages(vma));
-}
-
-static inline void vm_stat_unaccount(struct vm_area_struct *vma)
-{
- __vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file,
- -vma_pages(vma));
-}
-
-/* update per process rss and vm hiwater data */
-extern void update_mem_hiwater(struct task_struct *tsk);
-
#ifndef CONFIG_DEBUG_PAGEALLOC
static inline void
kernel_map_pages(struct page *page, int numpages, int enable)
unsigned int nr_frags;
unsigned short tso_size;
unsigned short tso_segs;
+ unsigned short ufo_size;
+ unsigned int ip6_frag_id;
struct sk_buff *frag_list;
skb_frag_t frags[MAX_SKB_FRAGS];
};
* struct sk_buff - socket buffer
* @next: Next buffer in list
* @prev: Previous buffer in list
- * @list: List we are on
* @sk: Socket we are owned by
* @tstamp: Time we arrived
* @dev: Device we arrived on/are leaving by
* @proto_csum_valid: Protocol csum validated since arriving at localhost
* @proto_csum_blank: Protocol csum must be added before leaving localhost
* @pkt_type: Packet class
+ * @fclone: skbuff clone status
* @ip_summed: Driver fed us an IP checksum
* @priority: Packet queueing priority
* @users: User count - see {datagram,tcp}.c
* @destructor: Destruct function
* @nfmark: Can be used for communication between hooks
* @nfct: Associated connection, if any
+ * @ipvs_property: skbuff is owned by ipvs
* @nfctinfo: Relationship of this skb to the connection
+ * @nfct_reasm: netfilter conntrack re-assembly pointer
* @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
* @tc_index: Traffic control index
* @tc_verd: traffic control verdict
nohdr:1,
nfctinfo:3;
__u8 pkt_type:3,
+ fclone:2,
#ifndef CONFIG_XEN
- fclone:2;
+ ipvs_property:1;
#else
- fclone:2,
+ ipvs_property:1,
proto_csum_valid:1,
proto_csum_blank:1;
- /* 1 bit spare */
#endif
__be16 protocol;
#ifdef CONFIG_NETFILTER
__u32 nfmark;
struct nf_conntrack *nfct;
-#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
- __u8 ipvs_property:1;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ struct sk_buff *nfct_reasm;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
struct nf_bridge_info *nf_bridge;
extern void skb_under_panic(struct sk_buff *skb, int len,
void *here);
+extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
+ int getfrag(void *from, char *to, int offset,
+ int len,int odd, struct sk_buff *skb),
+ void *from, int length);
+
struct skb_seq_state
{
__u32 lower_offset;
*/
/**
- * __skb_queue_head - queue a buffer at the list head
+ * __skb_queue_after - queue a buffer at the list head
* @list: list to use
+ * @prev: place after this buffer
* @newsk: buffer to queue
*
- * Queue a buffer at the start of a list. This function takes no locks
+ * Queue a buffer int the middle of a list. This function takes no locks
* and you must therefore hold required locks before calling it.
*
* A buffer cannot be placed on two lists at the same time.
*/
-extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
-static inline void __skb_queue_head(struct sk_buff_head *list,
- struct sk_buff *newsk)
+static inline void __skb_queue_after(struct sk_buff_head *list,
+ struct sk_buff *prev,
+ struct sk_buff *newsk)
{
- struct sk_buff *prev, *next;
-
+ struct sk_buff *next;
list->qlen++;
- prev = (struct sk_buff *)list;
+
next = prev->next;
newsk->next = next;
newsk->prev = prev;
next->prev = prev->next = newsk;
}
+/**
+ * __skb_queue_head - queue a buffer at the list head
+ * @list: list to use
+ * @newsk: buffer to queue
+ *
+ * Queue a buffer at the start of a list. This function takes no locks
+ * and you must therefore hold required locks before calling it.
+ *
+ * A buffer cannot be placed on two lists at the same time.
+ */
+extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk);
+static inline void __skb_queue_head(struct sk_buff_head *list,
+ struct sk_buff *newsk)
+{
+ __skb_queue_after(list, (struct sk_buff *)list, newsk);
+}
+
/**
* __skb_queue_tail - queue a buffer at the list tail
* @list: list to use
prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \
skb = skb->next)
+#define skb_queue_reverse_walk(queue, skb) \
+ for (skb = (queue)->prev; \
+ prefetch(skb->prev), (skb != (struct sk_buff *)(queue)); \
+ skb = skb->prev)
+
extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
int noblock, int *err);
extern int skb_copy_datagram_iovec(const struct sk_buff *from,
int offset, struct iovec *to,
int size);
-extern int skb_copy_and_csum_datagram_iovec(const
- struct sk_buff *skb,
+extern int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
int hlen,
struct iovec *iov);
extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
extern void __net_timestamp(struct sk_buff *skb);
+extern unsigned int __skb_checksum_complete(struct sk_buff *skb);
+
+/**
+ * skb_checksum_complete - Calculate checksum of an entire packet
+ * @skb: packet to process
+ *
+ * This function calculates the checksum over the entire packet plus
+ * the value of skb->csum. The latter can be used to supply the
+ * checksum of a pseudo header as used by TCP/UDP. It returns the
+ * checksum.
+ *
+ * For protocols that contain complete checksums such as ICMP/TCP/UDP,
+ * this function can be used to verify that checksum on received
+ * packets. In that case the function should return zero if the
+ * checksum is correct. In particular, this function will return zero
+ * if skb->ip_summed is CHECKSUM_UNNECESSARY which indicates that the
+ * hardware has already verified the correctness of the checksum.
+ */
+static inline unsigned int skb_checksum_complete(struct sk_buff *skb)
+{
+ return skb->ip_summed != CHECKSUM_UNNECESSARY &&
+ __skb_checksum_complete(skb);
+}
+
#ifdef CONFIG_NETFILTER
static inline void nf_conntrack_put(struct nf_conntrack *nfct)
{
if (nfct)
atomic_inc(&nfct->use);
}
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
+{
+ if (skb)
+ atomic_inc(&skb->users);
+}
+static inline void nf_conntrack_put_reasm(struct sk_buff *skb)
+{
+ if (skb)
+ kfree_skb(skb);
+}
+#endif
static inline void nf_reset(struct sk_buff *skb)
{
nf_conntrack_put(skb->nfct);
skb->nfct = NULL;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ nf_conntrack_put_reasm(skb->nfct_reasm);
+ skb->nfct_reasm = NULL;
+#endif
}
#ifdef CONFIG_BRIDGE_NETFILTER
/**
* synchronize_irq - wait for pending IRQ handlers (on other CPUs)
+ * @irq: interrupt number to wait for
*
* This function waits for any pending IRQ handlers for this interrupt
* to complete before returning. If you use this function while
{
struct irq_desc *desc = irq_desc + irq;
+ if (irq >= NR_IRQS)
+ return;
+
while (desc->status & IRQ_INPROGRESS)
cpu_relax();
}
irq_desc_t *desc = irq_desc + irq;
unsigned long flags;
+ if (irq >= NR_IRQS)
+ return;
+
spin_lock_irqsave(&desc->lock, flags);
if (!desc->depth++) {
desc->status |= IRQ_DISABLED;
{
irq_desc_t *desc = irq_desc + irq;
+ if (irq >= NR_IRQS)
+ return;
+
disable_irq_nosync(irq);
if (desc->action)
synchronize_irq(irq);
irq_desc_t *desc = irq_desc + irq;
unsigned long flags;
+ if (irq >= NR_IRQS)
+ return;
+
spin_lock_irqsave(&desc->lock, flags);
switch (desc->depth) {
case 0:
unsigned long flags;
int shared = 0;
+ if (irq >= NR_IRQS)
+ return -EINVAL;
+
if (desc->handler == &no_irq_type)
return -ENOSYS;
/*
config DEBUG_BUGVERBOSE
bool "Verbose BUG() reporting (adds 70K)" if DEBUG_KERNEL && EMBEDDED
depends on BUG
- depends on ARM || ARM26 || M32R || M68K || SPARC32 || SPARC64 || (X86 && !X86_64) || FRV
+ depends on ARM || ARM26 || M32R || M68K || SPARC32 || SPARC64 || X86_32 || FRV
default !EMBEDDED
help
Say Y here to make BUG() panics output the file name and line number
If unsure, say N.
+config DEBUG_VM
+ bool "Debug VM"
+ depends on DEBUG_KERNEL
+ help
+ Enable this to debug the virtual-memory system.
+
+ If unsure, say N.
+
config FRAME_POINTER
bool "Compile the kernel with frame pointers"
depends on DEBUG_KERNEL && (X86 || CRIS || M68K || M68KNOMMU || FRV || UML)
default y if DEBUG_INFO && UML
help
If you say Y here the resulting kernel image will be slightly larger
- and slower, but it might give very useful debugging information
- on some architectures or you use external debuggers.
+ and slower, but it might give very useful debugging information on
+ some architectures or if you use external debuggers.
If you don't debug the kernel, you can say N.
+config RCU_TORTURE_TEST
+ tristate "torture tests for RCU"
+ depends on DEBUG_KERNEL
+ default n
+ help
+ This option provides a kernel module that runs torture tests
+ on the RCU infrastructure. The kernel module may be built
+ after the fact on the running kernel to be tested, if desired.
+
+ Say Y here if you want RCU torture tests to start automatically
+ at boot time (you probably don't).
+ Say M if you want the RCU torture tests to build as a module.
+ Say N if you are unsure.
--- /dev/null
+#
+# Makefile for some libs needed in the kernel.
+#
+
+lib-y := errno.o ctype.o string.o vsprintf.o cmdline.o \
+ bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
+ idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \
+ sha1.o
+
+lib-y += kobject.o kref.o kobject_uevent.o klist.o
+
+obj-y += sort.o parser.o halfmd4.o
+
+ifeq ($(CONFIG_DEBUG_KOBJECT),y)
+CFLAGS_kobject.o += -DDEBUG
+CFLAGS_kobject_uevent.o += -DDEBUG
+endif
+
+obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
+lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
+lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
+lib-$(CONFIG_SEMAPHORE_SLEEPERS) += semaphore-sleepers.o
+lib-$(CONFIG_GENERIC_FIND_NEXT_BIT) += find_next_bit.o
+obj-$(CONFIG_LOCK_KERNEL) += kernel_lock.o
+obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
+
+ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
+ lib-y += dec_and_lock.o
+endif
+
+obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o
+obj-$(CONFIG_CRC16) += crc16.o
+obj-$(CONFIG_CRC32) += crc32.o
+obj-$(CONFIG_LIBCRC32C) += libcrc32c.o
+obj-$(CONFIG_GENERIC_IOMAP) += iomap.o
+obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
+
+obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
+obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
+obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
+
+obj-$(CONFIG_TEXTSEARCH) += textsearch.o
+obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
+obj-$(CONFIG_TEXTSEARCH_BM) += ts_bm.o
+obj-$(CONFIG_TEXTSEARCH_FSM) += ts_fsm.o
+
+ifneq ($(CONFIG_XEN),y)
+obj-$(CONFIG_SWIOTLB) += swiotlb.o
+endif
+
+hostprogs-y := gen_crc32table
+clean-files := crc32table.h
+
+$(obj)/crc32.o: $(obj)/crc32table.h
+
+quiet_cmd_crc32 = GEN $@
+ cmd_crc32 = $< > $@
+
+$(obj)/crc32table.h: $(obj)/gen_crc32table
+ $(call cmd,crc32)
--- /dev/null
+config SELECT_MEMORY_MODEL
+ def_bool y
+ depends on EXPERIMENTAL || ARCH_SELECT_MEMORY_MODEL
+
+choice
+ prompt "Memory model"
+ depends on SELECT_MEMORY_MODEL
+ default DISCONTIGMEM_MANUAL if ARCH_DISCONTIGMEM_DEFAULT
+ default SPARSEMEM_MANUAL if ARCH_SPARSEMEM_DEFAULT
+ default FLATMEM_MANUAL
+
+config FLATMEM_MANUAL
+ bool "Flat Memory"
+ depends on !ARCH_DISCONTIGMEM_ENABLE || ARCH_FLATMEM_ENABLE
+ help
+ This option allows you to change some of the ways that
+ Linux manages its memory internally. Most users will
+ only have one option here: FLATMEM. This is normal
+ and a correct option.
+
+ Some users of more advanced features like NUMA and
+ memory hotplug may have different options here.
+ DISCONTIGMEM is an more mature, better tested system,
+ but is incompatible with memory hotplug and may suffer
+ decreased performance over SPARSEMEM. If unsure between
+ "Sparse Memory" and "Discontiguous Memory", choose
+ "Discontiguous Memory".
+
+ If unsure, choose this option (Flat Memory) over any other.
+
+config DISCONTIGMEM_MANUAL
+ bool "Discontiguous Memory"
+ depends on ARCH_DISCONTIGMEM_ENABLE
+ help
+ This option provides enhanced support for discontiguous
+ memory systems, over FLATMEM. These systems have holes
+ in their physical address spaces, and this option provides
+ more efficient handling of these holes. However, the vast
+ majority of hardware has quite flat address spaces, and
+ can have degraded performance from extra overhead that
+ this option imposes.
+
+ Many NUMA configurations will have this as the only option.
+
+ If unsure, choose "Flat Memory" over this option.
+
+config SPARSEMEM_MANUAL
+ bool "Sparse Memory"
+ depends on ARCH_SPARSEMEM_ENABLE
+ help
+ This will be the only option for some systems, including
+ memory hotplug systems. This is normal.
+
+ For many other systems, this will be an alternative to
+ "Discontiguous Memory". This option provides some potential
+ performance benefits, along with decreased code complexity,
+ but it is newer, and more experimental.
+
+ If unsure, choose "Discontiguous Memory" or "Flat Memory"
+ over this option.
+
+endchoice
+
+config DISCONTIGMEM
+ def_bool y
+ depends on (!SELECT_MEMORY_MODEL && ARCH_DISCONTIGMEM_ENABLE) || DISCONTIGMEM_MANUAL
+
+config SPARSEMEM
+ def_bool y
+ depends on SPARSEMEM_MANUAL
+
+config FLATMEM
+ def_bool y
+ depends on (!DISCONTIGMEM && !SPARSEMEM) || FLATMEM_MANUAL
+
+config FLAT_NODE_MEM_MAP
+ def_bool y
+ depends on !SPARSEMEM
+
+#
+# Both the NUMA code and DISCONTIGMEM use arrays of pg_data_t's
+# to represent different areas of memory. This variable allows
+# those dependencies to exist individually.
+#
+config NEED_MULTIPLE_NODES
+ def_bool y
+ depends on DISCONTIGMEM || NUMA
+
+config HAVE_MEMORY_PRESENT
+ def_bool y
+ depends on ARCH_HAVE_MEMORY_PRESENT || SPARSEMEM
+
+#
+# SPARSEMEM_EXTREME (which is the default) does some bootmem
+# allocations when memory_present() is called. If this can not
+# be done on your architecture, select this option. However,
+# statically allocating the mem_section[] array can potentially
+# consume vast quantities of .bss, so be careful.
+#
+# This option will also potentially produce smaller runtime code
+# with gcc 3.4 and later.
+#
+config SPARSEMEM_STATIC
+ def_bool n
+
+#
+# Architectecture platforms which require a two level mem_section in SPARSEMEM
+# must select this option. This is usually for architecture platforms with
+# an extremely sparse physical address space.
+#
+config SPARSEMEM_EXTREME
+ def_bool y
+ depends on SPARSEMEM && !SPARSEMEM_STATIC
+
+# eventually, we can have this option just 'select SPARSEMEM'
+config MEMORY_HOTPLUG
+ bool "Allow for memory hot-add"
+ depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND
+
+comment "Memory hotplug is currently incompatible with Software Suspend"
+ depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND
+
+# Heavily threaded applications may benefit from splitting the mm-wide
+# page_table_lock, so that faults on different parts of the user address
+# space can be handled with less contention: split it at this NR_CPUS.
+# Default to 4 for wider testing, though 8 might be more appropriate.
+# ARM's adjust_pte (unused if VIPT) depends on mm-wide page_table_lock.
+# PA-RISC 7xxx's spinlock_t would enlarge struct page from 32 to 44 bytes.
+# XEN uses the mapping field on pagetable pages to store a pointer to
+# the destructor.
+#
+config SPLIT_PTLOCK_CPUS
+ int
+ default "4096" if ARM && !CPU_CACHE_VIPT
+ default "4096" if PARISC && !PA20
+ default "4096" if XEN
+ default "4"
static mempool_t *page_pool, *isa_page_pool;
-static void *page_pool_alloc(gfp_t gfp_mask, void *data)
+static void *page_pool_alloc_isa(gfp_t gfp_mask, void *data)
{
- unsigned int gfp = gfp_mask | (unsigned int) (long) data;
-
- return alloc_page(gfp);
+ return alloc_page(gfp_mask | GFP_DMA);
}
static void page_pool_free(void *page, void *data)
* n means that there are (n-1) current users of it.
*/
#ifdef CONFIG_HIGHMEM
+
+static void *page_pool_alloc(gfp_t gfp_mask, void *data)
+{
+ return alloc_page(gfp_mask);
+}
+
static int pkmap_count[LAST_PKMAP];
static unsigned int last_pkmap_nr;
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
if (isa_page_pool)
return 0;
- isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc, page_pool_free, (void *) __GFP_DMA);
+ isa_page_pool = mempool_create(ISA_POOL_SIZE, page_pool_alloc_isa, page_pool_free, NULL);
if (!isa_page_pool)
BUG();
{
struct page *page = pmd_page(*pmd);
pmd_clear(pmd);
+ pte_lock_deinit(page);
pte_free_tlb(tlb, page);
dec_page_state(nr_page_table_pages);
tlb->mm->nr_ptes--;
free_pud_range(*tlb, pgd, addr, next, floor, ceiling);
} while (pgd++, addr = next, addr != end);
- if (!tlb_is_full_mm(*tlb))
+ if (!(*tlb)->fullmm)
flush_tlb_pgtables((*tlb)->mm, start, end);
}
struct vm_area_struct *next = vma->vm_next;
unsigned long addr = vma->vm_start;
+ /*
+ * Hide vma from rmap and vmtruncate before freeing pgtables
+ */
+ anon_vma_unlink(vma);
+ unlink_file_vma(vma);
+
if (is_hugepage_only_range(vma->vm_mm, addr, HPAGE_SIZE)) {
hugetlb_free_pgd_range(tlb, addr, vma->vm_end,
floor, next? next->vm_start: ceiling);
HPAGE_SIZE)) {
vma = next;
next = vma->vm_next;
+ anon_vma_unlink(vma);
+ unlink_file_vma(vma);
}
free_pgd_range(tlb, addr, vma->vm_end,
floor, next? next->vm_start: ceiling);
}
}
-pte_t fastcall *pte_alloc_map(struct mm_struct *mm, pmd_t *pmd,
- unsigned long address)
+int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
{
- if (!pmd_present(*pmd)) {
- struct page *new;
+ struct page *new = pte_alloc_one(mm, address);
+ if (!new)
+ return -ENOMEM;
- spin_unlock(&mm->page_table_lock);
- new = pte_alloc_one(mm, address);
- spin_lock(&mm->page_table_lock);
- if (!new)
- return NULL;
- /*
- * Because we dropped the lock, we should re-check the
- * entry, as somebody else could have populated it..
- */
- if (pmd_present(*pmd)) {
- pte_free(new);
- goto out;
- }
+ pte_lock_init(new);
+ spin_lock(&mm->page_table_lock);
+ if (pmd_present(*pmd)) { /* Another has populated it */
+ pte_lock_deinit(new);
+ pte_free(new);
+ } else {
mm->nr_ptes++;
inc_page_state(nr_page_table_pages);
pmd_populate(mm, pmd, new);
}
-out:
- return pte_offset_map(pmd, address);
+ spin_unlock(&mm->page_table_lock);
+ return 0;
+}
+
+int __pte_alloc_kernel(pmd_t *pmd, unsigned long address)
+{
+ pte_t *new = pte_alloc_one_kernel(&init_mm, address);
+ if (!new)
+ return -ENOMEM;
+
+ spin_lock(&init_mm.page_table_lock);
+ if (pmd_present(*pmd)) /* Another has populated it */
+ pte_free_kernel(new);
+ else
+ pmd_populate_kernel(&init_mm, pmd, new);
+ spin_unlock(&init_mm.page_table_lock);
+ return 0;
+}
+
+static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
+{
+ if (file_rss)
+ add_mm_counter(mm, file_rss, file_rss);
+ if (anon_rss)
+ add_mm_counter(mm, anon_rss, anon_rss);
}
-pte_t fastcall * pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned long address)
+/*
+ * This function is called to print an error when a bad pte
+ * is found. For example, we might have a PFN-mapped pte in
+ * a region that doesn't allow it.
+ *
+ * The calling function must still handle the error.
+ */
+void print_bad_pte(struct vm_area_struct *vma, pte_t pte, unsigned long vaddr)
+{
+ printk(KERN_ERR "Bad pte = %08llx, process = %s, "
+ "vm_flags = %lx, vaddr = %lx\n",
+ (long long)pte_val(pte),
+ (vma->vm_mm == current->mm ? current->comm : "???"),
+ vma->vm_flags, vaddr);
+ dump_stack();
+}
+
+static inline int is_cow_mapping(unsigned int flags)
+{
+ return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
+}
+
+/*
+ * This function gets the "struct page" associated with a pte.
+ *
+ * NOTE! Some mappings do not have "struct pages". A raw PFN mapping
+ * will have each page table entry just pointing to a raw page frame
+ * number, and as far as the VM layer is concerned, those do not have
+ * pages associated with them - even if the PFN might point to memory
+ * that otherwise is perfectly fine and has a "struct page".
+ *
+ * The way we recognize those mappings is through the rules set up
+ * by "remap_pfn_range()": the vma will have the VM_PFNMAP bit set,
+ * and the vm_pgoff will point to the first PFN mapped: thus every
+ * page that is a raw mapping will always honor the rule
+ *
+ * pfn_of_page == vma->vm_pgoff + ((addr - vma->vm_start) >> PAGE_SHIFT)
+ *
+ * and if that isn't true, the page has been COW'ed (in which case it
+ * _does_ have a "struct page" associated with it even if it is in a
+ * VM_PFNMAP range).
+ */
+struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
{
- if (!pmd_present(*pmd)) {
- pte_t *new;
+ unsigned long pfn = pte_pfn(pte);
- spin_unlock(&mm->page_table_lock);
- new = pte_alloc_one_kernel(mm, address);
- spin_lock(&mm->page_table_lock);
- if (!new)
+ if (vma->vm_flags & VM_PFNMAP) {
+ unsigned long off = (addr - vma->vm_start) >> PAGE_SHIFT;
+ if (pfn == vma->vm_pgoff + off)
+ return NULL;
+ if (!is_cow_mapping(vma->vm_flags))
return NULL;
+ }
- /*
- * Because we dropped the lock, we should re-check the
- * entry, as somebody else could have populated it..
- */
- if (pmd_present(*pmd)) {
- pte_free_kernel(new);
- goto out;
- }
- pmd_populate_kernel(mm, pmd, new);
+ /*
+ * Add some anal sanity checks for now. Eventually,
+ * we should just do "return pfn_to_page(pfn)", but
+ * in the meantime we check that we get a valid pfn,
+ * and that the resulting page looks ok.
+ *
+ * Remove this test eventually!
+ */
+ if (unlikely(!pfn_valid(pfn))) {
+ if (!vma->vm_flags & VM_RESERVED)
+ print_bad_pte(vma, pte, addr);
+ return NULL;
}
-out:
- return pte_offset_kernel(pmd, address);
+
+ /*
+ * NOTE! We still have PageReserved() pages in the page
+ * tables.
+ *
+ * The PAGE_ZERO() pages and various VDSO mappings can
+ * cause them to exist.
+ */
+ return pfn_to_page(pfn);
}
/*
* copy one vm_area from one task to the other. Assumes the page tables
* already present in the new task to be cleared in the whole range
* covered by this vma.
- *
- * dst->page_table_lock is held on entry and exit,
- * but may be dropped within p[mg]d_alloc() and pte_alloc_map().
*/
static inline void
copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
- pte_t *dst_pte, pte_t *src_pte, unsigned long vm_flags,
- unsigned long addr)
+ pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
+ unsigned long addr, int *rss)
{
+ unsigned long vm_flags = vma->vm_flags;
pte_t pte = *src_pte;
struct page *page;
- unsigned long pfn;
/* pte contains position in swap or file, so copy. */
if (unlikely(!pte_present(pte))) {
/* make sure dst_mm is on swapoff's mmlist. */
if (unlikely(list_empty(&dst_mm->mmlist))) {
spin_lock(&mmlist_lock);
- list_add(&dst_mm->mmlist, &src_mm->mmlist);
+ if (list_empty(&dst_mm->mmlist))
+ list_add(&dst_mm->mmlist,
+ &src_mm->mmlist);
spin_unlock(&mmlist_lock);
}
}
- set_pte_at(dst_mm, addr, dst_pte, pte);
- return;
- }
-
- pfn = pte_pfn(pte);
- /* the pte points outside of valid memory, the
- * mapping is assumed to be good, meaningful
- * and not mapped via rmap - duplicate the
- * mapping as is.
- */
- page = NULL;
- if (pfn_valid(pfn))
- page = pfn_to_page(pfn);
-
- if (!page || PageReserved(page)) {
- set_pte_at(dst_mm, addr, dst_pte, pte);
- return;
+ goto out_set_pte;
}
/*
* If it's a COW mapping, write protect it both
* in the parent and the child
*/
- if ((vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) {
+ if (is_cow_mapping(vm_flags)) {
ptep_set_wrprotect(src_mm, addr, src_pte);
pte = *src_pte;
}
if (vm_flags & VM_SHARED)
pte = pte_mkclean(pte);
pte = pte_mkold(pte);
- get_page(page);
- inc_mm_counter(dst_mm, rss);
- if (PageAnon(page))
- inc_mm_counter(dst_mm, anon_rss);
+
+ page = vm_normal_page(vma, addr, pte);
+ if (page) {
+ get_page(page);
+ page_dup_rmap(page);
+ rss[!!PageAnon(page)]++;
+ }
+
+out_set_pte:
set_pte_at(dst_mm, addr, dst_pte, pte);
- page_dup_rmap(page);
}
static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
unsigned long addr, unsigned long end)
{
pte_t *src_pte, *dst_pte;
- unsigned long vm_flags = vma->vm_flags;
- int progress;
+ spinlock_t *src_ptl, *dst_ptl;
+ int progress = 0;
+ int rss[2];
again:
- dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr);
+ rss[1] = rss[0] = 0;
+ dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
if (!dst_pte)
return -ENOMEM;
src_pte = pte_offset_map_nested(src_pmd, addr);
+ src_ptl = pte_lockptr(src_mm, src_pmd);
+ spin_lock(src_ptl);
- progress = 0;
- spin_lock(&src_mm->page_table_lock);
do {
/*
* We are holding two locks at this point - either of them
* could generate latencies in another task on another CPU.
*/
- if (progress >= 32 && (need_resched() ||
- need_lockbreak(&src_mm->page_table_lock) ||
- need_lockbreak(&dst_mm->page_table_lock)))
- break;
+ if (progress >= 32) {
+ progress = 0;
+ if (need_resched() ||
+ need_lockbreak(src_ptl) ||
+ need_lockbreak(dst_ptl))
+ break;
+ }
if (pte_none(*src_pte)) {
progress++;
continue;
}
- copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vm_flags, addr);
+ copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
progress += 8;
} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
- spin_unlock(&src_mm->page_table_lock);
+ spin_unlock(src_ptl);
pte_unmap_nested(src_pte - 1);
- pte_unmap(dst_pte - 1);
- cond_resched_lock(&dst_mm->page_table_lock);
+ add_mm_rss(dst_mm, rss[0], rss[1]);
+ pte_unmap_unlock(dst_pte - 1, dst_ptl);
+ cond_resched();
if (addr != end)
goto again;
return 0;
* readonly mappings. The tradeoff is that copy_page_range is more
* efficient than faulting.
*/
- if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_RESERVED))) {
+ if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP|VM_INSERTPAGE))) {
if (!vma->anon_vma)
return 0;
}
return 0;
}
-static void zap_pte_range(struct mmu_gather *tlb, pmd_t *pmd,
+static unsigned long zap_pte_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, unsigned long end,
- struct zap_details *details)
+ long *zap_work, struct zap_details *details)
{
+ struct mm_struct *mm = tlb->mm;
pte_t *pte;
+ spinlock_t *ptl;
+ int file_rss = 0;
+ int anon_rss = 0;
- pte = pte_offset_map(pmd, addr);
+ pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
do {
pte_t ptent = *pte;
- if (pte_none(ptent))
+ if (pte_none(ptent)) {
+ (*zap_work)--;
continue;
+ }
if (pte_present(ptent)) {
- struct page *page = NULL;
- unsigned long pfn = pte_pfn(ptent);
- if (pfn_valid(pfn)) {
- page = pfn_to_page(pfn);
- if (PageReserved(page))
- page = NULL;
- }
+ struct page *page;
+
+ (*zap_work) -= PAGE_SIZE;
+
+ page = vm_normal_page(vma, addr, ptent);
if (unlikely(details) && page) {
/*
* unmap_shared_mapping_pages() wants to
page->index > details->last_index))
continue;
}
- ptent = ptep_get_and_clear_full(tlb->mm, addr, pte,
+ ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
tlb_remove_tlb_entry(tlb, pte, addr);
if (unlikely(!page))
if (unlikely(details) && details->nonlinear_vma
&& linear_page_index(details->nonlinear_vma,
addr) != page->index)
- set_pte_at(tlb->mm, addr, pte,
+ set_pte_at(mm, addr, pte,
pgoff_to_pte(page->index));
- if (pte_dirty(ptent))
- set_page_dirty(page);
if (PageAnon(page))
- dec_mm_counter(tlb->mm, anon_rss);
- else if (pte_young(ptent))
- mark_page_accessed(page);
- tlb->freed++;
+ anon_rss--;
+ else {
+ if (pte_dirty(ptent))
+ set_page_dirty(page);
+ if (pte_young(ptent))
+ mark_page_accessed(page);
+ file_rss--;
+ }
page_remove_rmap(page);
tlb_remove_page(tlb, page);
continue;
continue;
if (!pte_file(ptent))
free_swap_and_cache(pte_to_swp_entry(ptent));
- pte_clear_full(tlb->mm, addr, pte, tlb->fullmm);
- } while (pte++, addr += PAGE_SIZE, addr != end);
- pte_unmap(pte - 1);
+ pte_clear_full(mm, addr, pte, tlb->fullmm);
+ } while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
+
+ add_mm_rss(mm, file_rss, anon_rss);
+ pte_unmap_unlock(pte - 1, ptl);
+
+ return addr;
}
-static inline void zap_pmd_range(struct mmu_gather *tlb, pud_t *pud,
+static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pud_t *pud,
unsigned long addr, unsigned long end,
- struct zap_details *details)
+ long *zap_work, struct zap_details *details)
{
pmd_t *pmd;
unsigned long next;
pmd = pmd_offset(pud, addr);
do {
next = pmd_addr_end(addr, end);
- if (pmd_none_or_clear_bad(pmd))
+ if (pmd_none_or_clear_bad(pmd)) {
+ (*zap_work)--;
continue;
- zap_pte_range(tlb, pmd, addr, next, details);
- } while (pmd++, addr = next, addr != end);
+ }
+ next = zap_pte_range(tlb, vma, pmd, addr, next,
+ zap_work, details);
+ } while (pmd++, addr = next, (addr != end && *zap_work > 0));
+
+ return addr;
}
-static inline void zap_pud_range(struct mmu_gather *tlb, pgd_t *pgd,
+static inline unsigned long zap_pud_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma, pgd_t *pgd,
unsigned long addr, unsigned long end,
- struct zap_details *details)
+ long *zap_work, struct zap_details *details)
{
pud_t *pud;
unsigned long next;
pud = pud_offset(pgd, addr);
do {
next = pud_addr_end(addr, end);
- if (pud_none_or_clear_bad(pud))
+ if (pud_none_or_clear_bad(pud)) {
+ (*zap_work)--;
continue;
- zap_pmd_range(tlb, pud, addr, next, details);
- } while (pud++, addr = next, addr != end);
+ }
+ next = zap_pmd_range(tlb, vma, pud, addr, next,
+ zap_work, details);
+ } while (pud++, addr = next, (addr != end && *zap_work > 0));
+
+ return addr;
}
-static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma,
+static unsigned long unmap_page_range(struct mmu_gather *tlb,
+ struct vm_area_struct *vma,
unsigned long addr, unsigned long end,
- struct zap_details *details)
+ long *zap_work, struct zap_details *details)
{
pgd_t *pgd;
unsigned long next;
pgd = pgd_offset(vma->vm_mm, addr);
do {
next = pgd_addr_end(addr, end);
- if (pgd_none_or_clear_bad(pgd))
+ if (pgd_none_or_clear_bad(pgd)) {
+ (*zap_work)--;
continue;
- zap_pud_range(tlb, pgd, addr, next, details);
- } while (pgd++, addr = next, addr != end);
+ }
+ next = zap_pud_range(tlb, vma, pgd, addr, next,
+ zap_work, details);
+ } while (pgd++, addr = next, (addr != end && *zap_work > 0));
tlb_end_vma(tlb, vma);
+
+ return addr;
}
#ifdef CONFIG_PREEMPT
/**
* unmap_vmas - unmap a range of memory covered by a list of vma's
* @tlbp: address of the caller's struct mmu_gather
- * @mm: the controlling mm_struct
* @vma: the starting vma
* @start_addr: virtual address at which to start unmapping
* @end_addr: virtual address at which to end unmapping
*
* Returns the end address of the unmapping (restart addr if interrupted).
*
- * Unmap all pages in the vma list. Called under page_table_lock.
+ * Unmap all pages in the vma list.
*
- * We aim to not hold page_table_lock for too long (for scheduling latency
- * reasons). So zap pages in ZAP_BLOCK_SIZE bytecounts. This means we need to
+ * We aim to not hold locks for too long (for scheduling latency reasons).
+ * So zap pages in ZAP_BLOCK_SIZE bytecounts. This means we need to
* return the ending mmu_gather to the caller.
*
* Only addresses between `start' and `end' will be unmapped.
* ensure that any thus-far unmapped pages are flushed before unmap_vmas()
* drops the lock and schedules.
*/
-unsigned long unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm,
+unsigned long unmap_vmas(struct mmu_gather **tlbp,
struct vm_area_struct *vma, unsigned long start_addr,
unsigned long end_addr, unsigned long *nr_accounted,
struct zap_details *details)
{
- unsigned long zap_bytes = ZAP_BLOCK_SIZE;
+ long zap_work = ZAP_BLOCK_SIZE;
unsigned long tlb_start = 0; /* For tlb_finish_mmu */
int tlb_start_valid = 0;
unsigned long start = start_addr;
spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL;
- int fullmm = tlb_is_full_mm(*tlbp);
+ int fullmm = (*tlbp)->fullmm;
for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) {
unsigned long end;
*nr_accounted += (end - start) >> PAGE_SHIFT;
while (start != end) {
- unsigned long block;
-
if (!tlb_start_valid) {
tlb_start = start;
tlb_start_valid = 1;
}
- if (is_vm_hugetlb_page(vma)) {
- block = end - start;
+ if (unlikely(is_vm_hugetlb_page(vma))) {
unmap_hugepage_range(vma, start, end);
- } else {
- block = min(zap_bytes, end - start);
- unmap_page_range(*tlbp, vma, start,
- start + block, details);
+ zap_work -= (end - start) /
+ (HPAGE_SIZE / PAGE_SIZE);
+ start = end;
+ } else
+ start = unmap_page_range(*tlbp, vma,
+ start, end, &zap_work, details);
+
+ if (zap_work > 0) {
+ BUG_ON(start != end);
+ break;
}
- start += block;
- zap_bytes -= block;
- if ((long)zap_bytes > 0)
- continue;
-
tlb_finish_mmu(*tlbp, tlb_start, start);
if (need_resched() ||
- need_lockbreak(&mm->page_table_lock) ||
(i_mmap_lock && need_lockbreak(i_mmap_lock))) {
if (i_mmap_lock) {
- /* must reset count of rss freed */
- *tlbp = tlb_gather_mmu(mm, fullmm);
+ *tlbp = NULL;
goto out;
}
- spin_unlock(&mm->page_table_lock);
cond_resched();
- spin_lock(&mm->page_table_lock);
}
- *tlbp = tlb_gather_mmu(mm, fullmm);
+ *tlbp = tlb_gather_mmu(vma->vm_mm, fullmm);
tlb_start_valid = 0;
- zap_bytes = ZAP_BLOCK_SIZE;
+ zap_work = ZAP_BLOCK_SIZE;
}
}
out:
unsigned long end = address + size;
unsigned long nr_accounted = 0;
- if (is_vm_hugetlb_page(vma)) {
- zap_hugepage_range(vma, address, size);
- return end;
- }
-
lru_add_drain();
- spin_lock(&mm->page_table_lock);
tlb = tlb_gather_mmu(mm, 0);
- end = unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details);
- tlb_finish_mmu(tlb, address, end);
- spin_unlock(&mm->page_table_lock);
+ update_hiwater_rss(mm);
+ end = unmap_vmas(&tlb, vma, address, end, &nr_accounted, details);
+ if (tlb)
+ tlb_finish_mmu(tlb, address, end);
return end;
}
/*
* Do a quick page-table lookup for a single page.
- * mm->page_table_lock must be held.
*/
-static struct page *__follow_page(struct mm_struct *mm, unsigned long address,
- int read, int write, int accessed)
+struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
+ unsigned int flags)
{
pgd_t *pgd;
pud_t *pud;
pmd_t *pmd;
pte_t *ptep, pte;
- unsigned long pfn;
+ spinlock_t *ptl;
struct page *page;
+ struct mm_struct *mm = vma->vm_mm;
- page = follow_huge_addr(mm, address, write);
- if (! IS_ERR(page))
- return page;
+ page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
+ if (!IS_ERR(page)) {
+ BUG_ON(flags & FOLL_GET);
+ goto out;
+ }
+ page = NULL;
pgd = pgd_offset(mm, address);
if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
- goto out;
+ goto no_page_table;
pud = pud_offset(pgd, address);
if (pud_none(*pud) || unlikely(pud_bad(*pud)))
- goto out;
+ goto no_page_table;
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
+ goto no_page_table;
+
+ if (pmd_huge(*pmd)) {
+ BUG_ON(flags & FOLL_GET);
+ page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
goto out;
- if (pmd_huge(*pmd))
- return follow_huge_pmd(mm, address, pmd, write);
+ }
- ptep = pte_offset_map(pmd, address);
+ ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
if (!ptep)
goto out;
pte = *ptep;
- pte_unmap(ptep);
- if (pte_present(pte)) {
- if (write && !pte_write(pte))
- goto out;
- if (read && !pte_read(pte))
- goto out;
- pfn = pte_pfn(pte);
- if (pfn_valid(pfn)) {
- page = pfn_to_page(pfn);
- if (accessed) {
- if (write && !pte_dirty(pte) &&!PageDirty(page))
- set_page_dirty(page);
- mark_page_accessed(page);
- }
- return page;
- }
+ if (!pte_present(pte))
+ goto unlock;
+ if ((flags & FOLL_WRITE) && !pte_write(pte))
+ goto unlock;
+ page = vm_normal_page(vma, address, pte);
+ if (unlikely(!page))
+ goto unlock;
+
+ if (flags & FOLL_GET)
+ get_page(page);
+ if (flags & FOLL_TOUCH) {
+ if ((flags & FOLL_WRITE) &&
+ !pte_dirty(pte) && !PageDirty(page))
+ set_page_dirty(page);
+ mark_page_accessed(page);
}
-
+unlock:
+ pte_unmap_unlock(ptep, ptl);
out:
- return NULL;
-}
-
-inline struct page *
-follow_page(struct mm_struct *mm, unsigned long address, int write)
-{
- return __follow_page(mm, address, 0, write, 1);
-}
-
-/*
- * check_user_page_readable() can be called frm niterrupt context by oprofile,
- * so we need to avoid taking any non-irq-safe locks
- */
-int check_user_page_readable(struct mm_struct *mm, unsigned long address)
-{
- return __follow_page(mm, address, 1, 0, 0) != NULL;
-}
-EXPORT_SYMBOL(check_user_page_readable);
-
-static inline int
-untouched_anonymous_page(struct mm_struct* mm, struct vm_area_struct *vma,
- unsigned long address)
-{
- pgd_t *pgd;
- pud_t *pud;
- pmd_t *pmd;
-
- /* Check if the vma is for an anonymous mapping. */
- if (vma->vm_ops && vma->vm_ops->nopage)
- return 0;
-
- /* Check if page directory entry exists. */
- pgd = pgd_offset(mm, address);
- if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
- return 1;
-
- pud = pud_offset(pgd, address);
- if (pud_none(*pud) || unlikely(pud_bad(*pud)))
- return 1;
-
- /* Check if page middle directory entry exists. */
- pmd = pmd_offset(pud, address);
- if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
- return 1;
+ return page;
- /* There is a pte slot for 'address' in 'mm'. */
- return 0;
+no_page_table:
+ /*
+ * When core dumping an enormous anonymous area that nobody
+ * has touched so far, we don't want to allocate page tables.
+ */
+ if (flags & FOLL_ANON) {
+ page = ZERO_PAGE(address);
+ if (flags & FOLL_GET)
+ get_page(page);
+ BUG_ON(flags & FOLL_WRITE);
+ }
+ return page;
}
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
struct page **pages, struct vm_area_struct **vmas)
{
int i;
- unsigned int flags;
+ unsigned int vm_flags;
/*
* Require read or write permissions.
* If 'force' is set, we only require the "MAY" flags.
*/
- flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
- flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
+ vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
+ vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
i = 0;
do {
- struct vm_area_struct * vma;
+ struct vm_area_struct *vma;
+ unsigned int foll_flags;
vma = find_extend_vma(mm, start);
if (!vma && in_gate_area(tsk, start)) {
return i ? : -EFAULT;
}
if (pages) {
- pages[i] = pte_page(*pte);
- get_page(pages[i]);
+ struct page *page = vm_normal_page(gate_vma, start, *pte);
+ pages[i] = page;
+ if (page)
+ get_page(page);
}
pte_unmap(pte);
if (vmas)
}
}
#endif
- if (!vma || (vma->vm_flags & VM_IO)
- || !(flags & vma->vm_flags))
+ if (!vma || (vma->vm_flags & (VM_IO | VM_PFNMAP))
+ || !(vm_flags & vma->vm_flags))
return i ? : -EFAULT;
if (is_vm_hugetlb_page(vma)) {
&start, &len, i);
continue;
}
- spin_lock(&mm->page_table_lock);
+
+ foll_flags = FOLL_TOUCH;
+ if (pages)
+ foll_flags |= FOLL_GET;
+ if (!write && !(vma->vm_flags & VM_LOCKED) &&
+ (!vma->vm_ops || !vma->vm_ops->nopage))
+ foll_flags |= FOLL_ANON;
+
do {
- int write_access = write;
struct page *page;
- cond_resched_lock(&mm->page_table_lock);
- while (!(page = follow_page(mm, start, write_access))) {
- int ret;
-
- /*
- * Shortcut for anonymous pages. We don't want
- * to force the creation of pages tables for
- * insanely big anonymously mapped areas that
- * nobody touched so far. This is important
- * for doing a core dump for these mappings.
- */
- if (!write && untouched_anonymous_page(mm,vma,start)) {
- page = ZERO_PAGE(start);
- break;
- }
- spin_unlock(&mm->page_table_lock);
- ret = __handle_mm_fault(mm, vma, start, write_access);
+ if (write)
+ foll_flags |= FOLL_WRITE;
+ cond_resched();
+ while (!(page = follow_page(vma, start, foll_flags))) {
+ int ret;
+ ret = __handle_mm_fault(mm, vma, start,
+ foll_flags & FOLL_WRITE);
/*
* The VM_FAULT_WRITE bit tells us that do_wp_page has
* broken COW when necessary, even if maybe_mkwrite
* subsequent page lookups as if they were reads.
*/
if (ret & VM_FAULT_WRITE)
- write_access = 0;
+ foll_flags &= ~FOLL_WRITE;
switch (ret & ~VM_FAULT_WRITE) {
case VM_FAULT_MINOR:
default:
BUG();
}
- spin_lock(&mm->page_table_lock);
}
if (pages) {
pages[i] = page;
flush_dcache_page(page);
- if (!PageReserved(page))
- page_cache_get(page);
}
if (vmas)
vmas[i] = vma;
start += PAGE_SIZE;
len--;
} while (len && start < vma->vm_end);
- spin_unlock(&mm->page_table_lock);
} while (len);
return i;
}
unsigned long addr, unsigned long end, pgprot_t prot)
{
pte_t *pte;
+ spinlock_t *ptl;
- pte = pte_alloc_map(mm, pmd, addr);
+ pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
if (!pte)
return -ENOMEM;
do {
- pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(addr), prot));
+ struct page *page = ZERO_PAGE(addr);
+ pte_t zero_pte = pte_wrprotect(mk_pte(page, prot));
+ page_cache_get(page);
+ page_add_file_rmap(page);
+ inc_mm_counter(mm, file_rss);
BUG_ON(!pte_none(*pte));
set_pte_at(mm, addr, pte, zero_pte);
} while (pte++, addr += PAGE_SIZE, addr != end);
- pte_unmap(pte - 1);
+ pte_unmap_unlock(pte - 1, ptl);
return 0;
}
BUG_ON(addr >= end);
pgd = pgd_offset(mm, addr);
flush_cache_range(vma, addr, end);
- spin_lock(&mm->page_table_lock);
do {
next = pgd_addr_end(addr, end);
err = zeromap_pud_range(mm, pgd, addr, next, prot);
if (err)
break;
} while (pgd++, addr = next, addr != end);
- spin_unlock(&mm->page_table_lock);
return err;
}
+pte_t * fastcall get_locked_pte(struct mm_struct *mm, unsigned long addr, spinlock_t **ptl)
+{
+ pgd_t * pgd = pgd_offset(mm, addr);
+ pud_t * pud = pud_alloc(mm, pgd, addr);
+ if (pud) {
+ pmd_t * pmd = pmd_alloc(mm, pud, addr);
+ if (pmd)
+ return pte_alloc_map_lock(mm, pmd, addr, ptl);
+ }
+ return NULL;
+}
+
+/*
+ * This is the old fallback for page remapping.
+ *
+ * For historical reasons, it only allows reserved pages. Only
+ * old drivers should use this, and they needed to mark their
+ * pages reserved for the old functions anyway.
+ */
+static int insert_page(struct mm_struct *mm, unsigned long addr, struct page *page, pgprot_t prot)
+{
+ int retval;
+ pte_t *pte;
+ spinlock_t *ptl;
+
+ retval = -EINVAL;
+ if (PageAnon(page))
+ goto out;
+ retval = -ENOMEM;
+ flush_dcache_page(page);
+ pte = get_locked_pte(mm, addr, &ptl);
+ if (!pte)
+ goto out;
+ retval = -EBUSY;
+ if (!pte_none(*pte))
+ goto out_unlock;
+
+ /* Ok, finally just insert the thing.. */
+ get_page(page);
+ inc_mm_counter(mm, file_rss);
+ page_add_file_rmap(page);
+ set_pte_at(mm, addr, pte, mk_pte(page, prot));
+
+ retval = 0;
+out_unlock:
+ pte_unmap_unlock(pte, ptl);
+out:
+ return retval;
+}
+
+/*
+ * This allows drivers to insert individual pages they've allocated
+ * into a user vma.
+ *
+ * The page has to be a nice clean _individual_ kernel allocation.
+ * If you allocate a compound page, you need to have marked it as
+ * such (__GFP_COMP), or manually just split the page up yourself
+ * (which is mainly an issue of doing "set_page_count(page, 1)" for
+ * each sub-page, and then freeing them one by one when you free
+ * them rather than freeing it as a compound page).
+ *
+ * NOTE! Traditionally this was done with "remap_pfn_range()" which
+ * took an arbitrary page protection parameter. This doesn't allow
+ * that. Your vma protection will have to be set up correctly, which
+ * means that if you want a shared writable mapping, you'd better
+ * ask for a shared writable mapping!
+ *
+ * The page does not need to be reserved.
+ */
+int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *page)
+{
+ if (addr < vma->vm_start || addr >= vma->vm_end)
+ return -EFAULT;
+ if (!page_count(page))
+ return -EINVAL;
+ vma->vm_flags |= VM_INSERTPAGE;
+ return insert_page(vma->vm_mm, addr, page, vma->vm_page_prot);
+}
+EXPORT_SYMBOL(vm_insert_page);
+
/*
* maps a range of physical memory into the requested pages. the old
* mappings are removed. any references to nonexistent pages results
unsigned long pfn, pgprot_t prot)
{
pte_t *pte;
+ spinlock_t *ptl;
- pte = pte_alloc_map(mm, pmd, addr);
+ pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
if (!pte)
return -ENOMEM;
do {
BUG_ON(!pte_none(*pte));
- if (!pfn_valid(pfn) || PageReserved(pfn_to_page(pfn)))
- set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
+ set_pte_at(mm, addr, pte, pfn_pte(pfn, prot));
pfn++;
} while (pte++, addr += PAGE_SIZE, addr != end);
- pte_unmap(pte - 1);
+ pte_unmap_unlock(pte - 1, ptl);
return 0;
}
* rest of the world about it:
* VM_IO tells people not to look at these pages
* (accesses can have side effects).
- * VM_RESERVED tells swapout not to try to touch
- * this region.
+ * VM_RESERVED is specified all over the place, because
+ * in 2.4 it kept swapout's vma scan off this vma; but
+ * in 2.6 the LRU scan won't even find its pages, so this
+ * flag means no more than count its pages in reserved_vm,
+ * and omit it from core dump, even when VM_IO turned off.
+ * VM_PFNMAP tells the core MM that the base pages are just
+ * raw PFN mappings, and do not have a "struct page" associated
+ * with them.
+ *
+ * There's a horrible special case to handle copy-on-write
+ * behaviour that some programs depend on. We mark the "original"
+ * un-COW'ed pages by matching them up with "vma->vm_pgoff".
*/
- vma->vm_flags |= VM_IO | VM_RESERVED;
+ if (is_cow_mapping(vma->vm_flags)) {
+ if (addr != vma->vm_start || end != vma->vm_end)
+ return -EINVAL;
+ vma->vm_pgoff = pfn;
+ }
+
+ vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
BUG_ON(addr >= end);
pfn -= addr >> PAGE_SHIFT;
pgd = pgd_offset(mm, addr);
flush_cache_range(vma, addr, end);
- spin_lock(&mm->page_table_lock);
do {
next = pgd_addr_end(addr, end);
err = remap_pud_range(mm, pgd, addr, next,
if (err)
break;
} while (pgd++, addr = next, addr != end);
- spin_unlock(&mm->page_table_lock);
return err;
}
EXPORT_SYMBOL(remap_pfn_range);
struct page *pte_page;
pte = (mm == &init_mm) ?
- pte_alloc_kernel(mm, pmd, addr) :
+ pte_alloc_kernel(pmd, addr) :
pte_alloc_map(mm, pmd, addr);
if (!pte)
return -ENOMEM;
BUG_ON(addr >= end);
pgd = pgd_offset(mm, addr);
- spin_lock(&mm->page_table_lock);
do {
next = pgd_addr_end(addr, end);
err = generic_pud_range(mm, pgd, addr, next, fn, data);
if (err)
break;
} while (pgd++, addr = next, addr != end);
- spin_unlock(&mm->page_table_lock);
return err;
}
#endif
+/*
+ * handle_pte_fault chooses page fault handler according to an entry
+ * which was read non-atomically. Before making any commitment, on
+ * those architectures or configurations (e.g. i386 with PAE) which
+ * might give a mix of unmatched parts, do_swap_page and do_file_page
+ * must check under lock before unmapping the pte and proceeding
+ * (but do_wp_page is only called after already making such a check;
+ * and do_anonymous_page and do_no_page can safely check later on).
+ */
+static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
+ pte_t *page_table, pte_t orig_pte)
+{
+ int same = 1;
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
+ if (sizeof(pte_t) > sizeof(unsigned long)) {
+ spinlock_t *ptl = pte_lockptr(mm, pmd);
+ spin_lock(ptl);
+ same = pte_same(*page_table, orig_pte);
+ spin_unlock(ptl);
+ }
+#endif
+ pte_unmap(page_table);
+ return same;
+}
+
/*
* Do pte_mkwrite, but only if the vma says VM_WRITE. We do this when
* servicing faults for write access. In the normal case, do always want
return pte;
}
-/*
- * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
- */
-static inline void break_cow(struct vm_area_struct * vma, struct page * new_page, unsigned long address,
- pte_t *page_table)
+static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va)
{
- pte_t entry;
+ /*
+ * If the source page was a PFN mapping, we don't have
+ * a "struct page" for it. We do a best-effort copy by
+ * just copying from the original user address. If that
+ * fails, we just zero-fill it. Live with it.
+ */
+ if (unlikely(!src)) {
+ void *kaddr = kmap_atomic(dst, KM_USER0);
+ void __user *uaddr = (void __user *)(va & PAGE_MASK);
- entry = maybe_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)),
- vma);
- ptep_establish(vma, address, page_table, entry);
- update_mmu_cache(vma, address, entry);
- lazy_mmu_prot_update(entry);
+ /*
+ * This really shouldn't fail, because the page is there
+ * in the page tables. But it might just be unreadable,
+ * in which case we just give up and fill the result with
+ * zeroes.
+ */
+ if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE))
+ memset(kaddr, 0, PAGE_SIZE);
+ kunmap_atomic(kaddr, KM_USER0);
+ return;
+
+ }
+ copy_user_highpage(dst, src, va);
}
/*
* to a shared page. It is done by copying the page to a new address
* and decrementing the shared-page counter for the old page.
*
- * Goto-purists beware: the only reason for goto's here is that it results
- * in better assembly code.. The "default" path will see no jumps at all.
- *
* Note that this routine assumes that the protection checks have been
* done by the caller (the low-level page fault routine in most cases).
* Thus we can safely just mark it writable once we've done any necessary
* change only once the write actually happens. This avoids a few races,
* and potentially makes it more efficient.
*
- * We hold the mm semaphore and the page_table_lock on entry and exit
- * with the page_table_lock released.
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), with pte both mapped and locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
*/
-static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma,
- unsigned long address, pte_t *page_table, pmd_t *pmd, pte_t pte)
+static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, pte_t *page_table, pmd_t *pmd,
+ spinlock_t *ptl, pte_t orig_pte)
{
struct page *old_page, *new_page;
- unsigned long pfn = pte_pfn(pte);
pte_t entry;
- int ret;
-#ifdef CONFIG_XEN
- struct page invalid_page;
-#endif
+ int ret = VM_FAULT_MINOR;
- if (unlikely(!pfn_valid(pfn))) {
-#ifdef CONFIG_XEN
- /* This can happen with /dev/mem (PROT_WRITE, MAP_PRIVATE). */
- invalid_page.flags = (1<<PG_reserved) | (1<<PG_locked);
- old_page = &invalid_page;
- } else {
- old_page = pfn_to_page(pfn);
-#else
- /*
- * This should really halt the system so it can be debugged or
- * at least the kernel stops what it's doing before it corrupts
- * data, but for the moment just pretend this is OOM.
- */
- pte_unmap(page_table);
- printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n",
- address);
- spin_unlock(&mm->page_table_lock);
- return VM_FAULT_OOM;
-#endif
- }
-#ifndef CONFIG_XEN
- old_page = pfn_to_page(pfn);
-#endif
+ old_page = vm_normal_page(vma, address, orig_pte);
+ if (!old_page)
+ goto gotten;
if (PageAnon(old_page) && !TestSetPageLocked(old_page)) {
int reuse = can_share_swap_page(old_page);
unlock_page(old_page);
if (reuse) {
- flush_cache_page(vma, address, pfn);
- entry = maybe_mkwrite(pte_mkyoung(pte_mkdirty(pte)),
- vma);
+ flush_cache_page(vma, address, pte_pfn(orig_pte));
+ entry = pte_mkyoung(orig_pte);
+ entry = maybe_mkwrite(pte_mkdirty(entry), vma);
ptep_set_access_flags(vma, address, page_table, entry, 1);
update_mmu_cache(vma, address, entry);
lazy_mmu_prot_update(entry);
- pte_unmap(page_table);
- spin_unlock(&mm->page_table_lock);
- return VM_FAULT_MINOR|VM_FAULT_WRITE;
+ ret |= VM_FAULT_WRITE;
+ goto unlock;
}
}
- pte_unmap(page_table);
/*
* Ok, we need to copy. Oh, well..
*/
- if (!PageReserved(old_page))
- page_cache_get(old_page);
- spin_unlock(&mm->page_table_lock);
+ page_cache_get(old_page);
+gotten:
+ pte_unmap_unlock(page_table, ptl);
if (unlikely(anon_vma_prepare(vma)))
- goto no_new_page;
+ goto oom;
if (old_page == ZERO_PAGE(address)) {
new_page = alloc_zeroed_user_highpage(vma, address);
if (!new_page)
- goto no_new_page;
+ goto oom;
} else {
new_page = alloc_page_vma(GFP_HIGHUSER, vma, address);
if (!new_page)
- goto no_new_page;
-#ifndef CONFIG_XEN
- copy_user_highpage(new_page, old_page, address);
-#else
- if (old_page == &invalid_page) {
- char *vto = kmap_atomic(new_page, KM_USER1);
- copy_page(vto, (void *)(address & PAGE_MASK));
- kunmap_atomic(vto, KM_USER1);
- } else {
- copy_user_highpage(new_page, old_page, address);
- }
-#endif
+ goto oom;
+ cow_user_page(new_page, old_page, address);
}
+
/*
* Re-check the pte - we dropped the lock
*/
- ret = VM_FAULT_MINOR;
- spin_lock(&mm->page_table_lock);
- page_table = pte_offset_map(pmd, address);
- if (likely(pte_same(*page_table, pte))) {
- if (PageAnon(old_page))
- dec_mm_counter(mm, anon_rss);
- if (PageReserved(old_page))
- inc_mm_counter(mm, rss);
- else
+ page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+ if (likely(pte_same(*page_table, orig_pte))) {
+ if (old_page) {
page_remove_rmap(old_page);
- flush_cache_page(vma, address, pfn);
- break_cow(vma, new_page, address, page_table);
+ if (!PageAnon(old_page)) {
+ dec_mm_counter(mm, file_rss);
+ inc_mm_counter(mm, anon_rss);
+ }
+ } else
+ inc_mm_counter(mm, anon_rss);
+ flush_cache_page(vma, address, pte_pfn(orig_pte));
+ entry = mk_pte(new_page, vma->vm_page_prot);
+ entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ ptep_establish(vma, address, page_table, entry);
+ update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
lru_cache_add_active(new_page);
page_add_anon_rmap(new_page, vma, address);
new_page = old_page;
ret |= VM_FAULT_WRITE;
}
- pte_unmap(page_table);
- page_cache_release(new_page);
- page_cache_release(old_page);
- spin_unlock(&mm->page_table_lock);
+ if (new_page)
+ page_cache_release(new_page);
+ if (old_page)
+ page_cache_release(old_page);
+unlock:
+ pte_unmap_unlock(page_table, ptl);
return ret;
-
-no_new_page:
- page_cache_release(old_page);
+oom:
+ if (old_page)
+ page_cache_release(old_page);
return VM_FAULT_OOM;
}
restart_addr = zap_page_range(vma, start_addr,
end_addr - start_addr, details);
-
- /*
- * We cannot rely on the break test in unmap_vmas:
- * on the one hand, we don't want to restart our loop
- * just because that broke out for the page_table_lock;
- * on the other hand, it does no test when vma is small.
- */
need_break = need_resched() ||
need_lockbreak(details->i_mmap_lock);
}
/*
- * We hold the mm semaphore and the page_table_lock on entry and
- * should release the pagetable lock on exit..
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
*/
-static int do_swap_page(struct mm_struct * mm,
- struct vm_area_struct * vma, unsigned long address,
- pte_t *page_table, pmd_t *pmd, pte_t orig_pte, int write_access)
+static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, pte_t *page_table, pmd_t *pmd,
+ int write_access, pte_t orig_pte)
{
+ spinlock_t *ptl;
struct page *page;
- swp_entry_t entry = pte_to_swp_entry(orig_pte);
+ swp_entry_t entry;
pte_t pte;
int ret = VM_FAULT_MINOR;
- pte_unmap(page_table);
- spin_unlock(&mm->page_table_lock);
+ if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
+ goto out;
+
+ entry = pte_to_swp_entry(orig_pte);
page = lookup_swap_cache(entry);
if (!page) {
swapin_readahead(entry, address, vma);
page = read_swap_cache_async(entry, vma, address);
if (!page) {
/*
- * Back out if somebody else faulted in this pte while
- * we released the page table lock.
+ * Back out if somebody else faulted in this pte
+ * while we released the pte lock.
*/
- spin_lock(&mm->page_table_lock);
- page_table = pte_offset_map(pmd, address);
+ page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
if (likely(pte_same(*page_table, orig_pte)))
ret = VM_FAULT_OOM;
- else
- ret = VM_FAULT_MINOR;
- pte_unmap(page_table);
- spin_unlock(&mm->page_table_lock);
- goto out;
+ goto unlock;
}
/* Had to read the page from swap area: Major fault */
lock_page(page);
/*
- * Back out if somebody else faulted in this pte while we
- * released the page table lock.
+ * Back out if somebody else already faulted in this pte.
*/
- spin_lock(&mm->page_table_lock);
- page_table = pte_offset_map(pmd, address);
- if (unlikely(!pte_same(*page_table, orig_pte))) {
- ret = VM_FAULT_MINOR;
+ page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+ if (unlikely(!pte_same(*page_table, orig_pte)))
goto out_nomap;
- }
if (unlikely(!PageUptodate(page))) {
ret = VM_FAULT_SIGBUS;
/* The page isn't present yet, go ahead with the fault. */
- inc_mm_counter(mm, rss);
+ inc_mm_counter(mm, anon_rss);
pte = mk_pte(page, vma->vm_page_prot);
if (write_access && can_share_swap_page(page)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
if (write_access) {
if (do_wp_page(mm, vma, address,
- page_table, pmd, pte) == VM_FAULT_OOM)
+ page_table, pmd, ptl, pte) == VM_FAULT_OOM)
ret = VM_FAULT_OOM;
goto out;
}
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, address, pte);
lazy_mmu_prot_update(pte);
- pte_unmap(page_table);
- spin_unlock(&mm->page_table_lock);
+unlock:
+ pte_unmap_unlock(page_table, ptl);
out:
return ret;
out_nomap:
- pte_unmap(page_table);
- spin_unlock(&mm->page_table_lock);
+ pte_unmap_unlock(page_table, ptl);
unlock_page(page);
page_cache_release(page);
- goto out;
+ return ret;
}
/*
- * We are called with the MM semaphore and page_table_lock
- * spinlock held to protect against concurrent faults in
- * multithreaded programs.
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
*/
-static int
-do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
- pte_t *page_table, pmd_t *pmd, int write_access,
- unsigned long addr)
+static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, pte_t *page_table, pmd_t *pmd,
+ int write_access)
{
+ struct page *page;
+ spinlock_t *ptl;
pte_t entry;
- struct page * page = ZERO_PAGE(addr);
-
- /* Read-only mapping of ZERO_PAGE. */
- entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
- /* ..except if it's a write access */
if (write_access) {
/* Allocate our own private page. */
pte_unmap(page_table);
- spin_unlock(&mm->page_table_lock);
if (unlikely(anon_vma_prepare(vma)))
- goto no_mem;
- page = alloc_zeroed_user_highpage(vma, addr);
+ goto oom;
+ page = alloc_zeroed_user_highpage(vma, address);
if (!page)
- goto no_mem;
+ goto oom;
- spin_lock(&mm->page_table_lock);
- page_table = pte_offset_map(pmd, addr);
+ entry = mk_pte(page, vma->vm_page_prot);
+ entry = maybe_mkwrite(pte_mkdirty(entry), vma);
- if (!pte_none(*page_table)) {
- pte_unmap(page_table);
- page_cache_release(page);
- spin_unlock(&mm->page_table_lock);
- goto out;
- }
- inc_mm_counter(mm, rss);
- entry = maybe_mkwrite(pte_mkdirty(mk_pte(page,
- vma->vm_page_prot)),
- vma);
+ page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
+ if (!pte_none(*page_table))
+ goto release;
+ inc_mm_counter(mm, anon_rss);
lru_cache_add_active(page);
SetPageReferenced(page);
- page_add_anon_rmap(page, vma, addr);
+ page_add_anon_rmap(page, vma, address);
+ } else {
+ /* Map the ZERO_PAGE - vm_page_prot is readonly */
+ page = ZERO_PAGE(address);
+ page_cache_get(page);
+ entry = mk_pte(page, vma->vm_page_prot);
+
+ ptl = pte_lockptr(mm, pmd);
+ spin_lock(ptl);
+ if (!pte_none(*page_table))
+ goto release;
+ inc_mm_counter(mm, file_rss);
+ page_add_file_rmap(page);
}
- set_pte_at(mm, addr, page_table, entry);
- pte_unmap(page_table);
+ set_pte_at(mm, address, page_table, entry);
/* No need to invalidate - it was non-present before */
- update_mmu_cache(vma, addr, entry);
+ update_mmu_cache(vma, address, entry);
lazy_mmu_prot_update(entry);
- spin_unlock(&mm->page_table_lock);
-out:
+unlock:
+ pte_unmap_unlock(page_table, ptl);
return VM_FAULT_MINOR;
-no_mem:
+release:
+ page_cache_release(page);
+ goto unlock;
+oom:
return VM_FAULT_OOM;
}
* As this is called only for pages that do not currently exist, we
* do not need to flush old virtual caches or the TLB.
*
- * This is called with the MM semaphore held and the page table
- * spinlock held. Exit with the spinlock released.
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
*/
-static int
-do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, int write_access, pte_t *page_table, pmd_t *pmd)
+static int do_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, pte_t *page_table, pmd_t *pmd,
+ int write_access)
{
- struct page * new_page;
+ spinlock_t *ptl;
+ struct page *new_page;
struct address_space *mapping = NULL;
pte_t entry;
unsigned int sequence = 0;
int ret = VM_FAULT_MINOR;
int anon = 0;
- if (!vma->vm_ops || !vma->vm_ops->nopage)
- return do_anonymous_page(mm, vma, page_table,
- pmd, write_access, address);
pte_unmap(page_table);
- spin_unlock(&mm->page_table_lock);
+ BUG_ON(vma->vm_flags & VM_PFNMAP);
if (vma->vm_file) {
mapping = vma->vm_file->f_mapping;
smp_rmb(); /* serializes i_size against truncate_count */
}
retry:
- cond_resched();
new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret);
/*
* No smp_rmb is needed here as long as there's a full
anon = 1;
}
- spin_lock(&mm->page_table_lock);
+ page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
/*
* For a file-backed vma, someone could have truncated or otherwise
* invalidated this page. If unmap_mapping_range got called,
* retry getting the page.
*/
if (mapping && unlikely(sequence != mapping->truncate_count)) {
- sequence = mapping->truncate_count;
- spin_unlock(&mm->page_table_lock);
+ pte_unmap_unlock(page_table, ptl);
page_cache_release(new_page);
+ cond_resched();
+ sequence = mapping->truncate_count;
+ smp_rmb();
goto retry;
}
- page_table = pte_offset_map(pmd, address);
/*
* This silly early PAGE_DIRTY setting removes a race
*/
/* Only go through if we didn't race with anybody else... */
if (pte_none(*page_table)) {
- if (!PageReserved(new_page))
- inc_mm_counter(mm, rss);
-
flush_icache_page(vma, new_page);
entry = mk_pte(new_page, vma->vm_page_prot);
if (write_access)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
set_pte_at(mm, address, page_table, entry);
if (anon) {
+ inc_mm_counter(mm, anon_rss);
lru_cache_add_active(new_page);
page_add_anon_rmap(new_page, vma, address);
- } else
+ } else {
+ inc_mm_counter(mm, file_rss);
page_add_file_rmap(new_page);
- pte_unmap(page_table);
+ }
} else {
/* One of our sibling threads was faster, back out. */
- pte_unmap(page_table);
page_cache_release(new_page);
- spin_unlock(&mm->page_table_lock);
- goto out;
+ goto unlock;
}
/* no need to invalidate: a not-present page shouldn't be cached */
update_mmu_cache(vma, address, entry);
lazy_mmu_prot_update(entry);
- spin_unlock(&mm->page_table_lock);
-out:
+unlock:
+ pte_unmap_unlock(page_table, ptl);
return ret;
oom:
page_cache_release(new_page);
- ret = VM_FAULT_OOM;
- goto out;
+ return VM_FAULT_OOM;
}
/*
* Fault of a previously existing named mapping. Repopulate the pte
* from the encoded file_pte if possible. This enables swappable
* nonlinear vmas.
+ *
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
*/
-static int do_file_page(struct mm_struct * mm, struct vm_area_struct * vma,
- unsigned long address, int write_access, pte_t *pte, pmd_t *pmd)
+static int do_file_page(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, pte_t *page_table, pmd_t *pmd,
+ int write_access, pte_t orig_pte)
{
- unsigned long pgoff;
+ pgoff_t pgoff;
int err;
- BUG_ON(!vma->vm_ops || !vma->vm_ops->nopage);
- /*
- * Fall back to the linear mapping if the fs does not support
- * ->populate:
- */
- if (!vma->vm_ops->populate ||
- (write_access && !(vma->vm_flags & VM_SHARED))) {
- pte_clear(mm, address, pte);
- return do_no_page(mm, vma, address, write_access, pte, pmd);
- }
-
- pgoff = pte_to_pgoff(*pte);
+ if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
+ return VM_FAULT_MINOR;
- pte_unmap(pte);
- spin_unlock(&mm->page_table_lock);
+ if (unlikely(!(vma->vm_flags & VM_NONLINEAR))) {
+ /*
+ * Page table corrupted: show pte and kill process.
+ */
+ print_bad_pte(vma, orig_pte, address);
+ return VM_FAULT_OOM;
+ }
+ /* We can then assume vm->vm_ops && vma->vm_ops->populate */
- err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE, vma->vm_page_prot, pgoff, 0);
+ pgoff = pte_to_pgoff(orig_pte);
+ err = vma->vm_ops->populate(vma, address & PAGE_MASK, PAGE_SIZE,
+ vma->vm_page_prot, pgoff, 0);
if (err == -ENOMEM)
return VM_FAULT_OOM;
if (err)
* with external mmu caches can use to update those (ie the Sparc or
* PowerPC hashed page tables that act as extended TLBs).
*
- * Note the "page_table_lock". It is to protect against kswapd removing
- * pages from under us. Note that kswapd only ever _removes_ pages, never
- * adds them. As such, once we have noticed that the page is not present,
- * we can drop the lock early.
- *
- * The adding of pages is protected by the MM semaphore (which we hold),
- * so we don't need to worry about a page being suddenly been added into
- * our VM.
- *
- * We enter with the pagetable spinlock held, we are supposed to
- * release it when done.
+ * We enter with non-exclusive mmap_sem (to exclude vma changes,
+ * but allow concurrent faults), and pte mapped but not yet locked.
+ * We return with mmap_sem still held, but pte unmapped and unlocked.
*/
static inline int handle_pte_fault(struct mm_struct *mm,
- struct vm_area_struct * vma, unsigned long address,
- int write_access, pte_t *pte, pmd_t *pmd)
+ struct vm_area_struct *vma, unsigned long address,
+ pte_t *pte, pmd_t *pmd, int write_access)
{
pte_t entry;
+ pte_t old_entry;
+ spinlock_t *ptl;
- entry = *pte;
+ old_entry = entry = *pte;
if (!pte_present(entry)) {
- /*
- * If it truly wasn't present, we know that kswapd
- * and the PTE updates will not touch it later. So
- * drop the lock.
- */
- if (pte_none(entry))
- return do_no_page(mm, vma, address, write_access, pte, pmd);
+ if (pte_none(entry)) {
+ if (!vma->vm_ops || !vma->vm_ops->nopage)
+ return do_anonymous_page(mm, vma, address,
+ pte, pmd, write_access);
+ return do_no_page(mm, vma, address,
+ pte, pmd, write_access);
+ }
if (pte_file(entry))
- return do_file_page(mm, vma, address, write_access, pte, pmd);
- return do_swap_page(mm, vma, address, pte, pmd, entry, write_access);
+ return do_file_page(mm, vma, address,
+ pte, pmd, write_access, entry);
+ return do_swap_page(mm, vma, address,
+ pte, pmd, write_access, entry);
}
+ ptl = pte_lockptr(mm, pmd);
+ spin_lock(ptl);
+ if (unlikely(!pte_same(*pte, entry)))
+ goto unlock;
if (write_access) {
if (!pte_write(entry))
- return do_wp_page(mm, vma, address, pte, pmd, entry);
+ return do_wp_page(mm, vma, address,
+ pte, pmd, ptl, entry);
entry = pte_mkdirty(entry);
}
entry = pte_mkyoung(entry);
- ptep_set_access_flags(vma, address, pte, entry, write_access);
- update_mmu_cache(vma, address, entry);
- lazy_mmu_prot_update(entry);
- pte_unmap(pte);
- spin_unlock(&mm->page_table_lock);
+ if (!pte_same(old_entry, entry)) {
+ ptep_set_access_flags(vma, address, pte, entry, write_access);
+ update_mmu_cache(vma, address, entry);
+ lazy_mmu_prot_update(entry);
+ } else {
+ /*
+ * This is needed only for protection faults but the arch code
+ * is not yet telling us if this is a protection fault or not.
+ * This still avoids useless tlb flushes for .text page faults
+ * with threads.
+ */
+ if (write_access)
+ flush_tlb_page(vma, address);
+ }
+unlock:
+ pte_unmap_unlock(pte, ptl);
return VM_FAULT_MINOR;
}
/*
* By the time we get here, we already hold the mm semaphore
*/
-int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
+int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, int write_access)
{
pgd_t *pgd;
if (unlikely(is_vm_hugetlb_page(vma)))
return hugetlb_fault(mm, vma, address, write_access);
- /*
- * We need the page table lock to synchronize with kswapd
- * and the SMP-safe atomic PTE updates.
- */
pgd = pgd_offset(mm, address);
- spin_lock(&mm->page_table_lock);
-
pud = pud_alloc(mm, pgd, address);
if (!pud)
- goto oom;
-
+ return VM_FAULT_OOM;
pmd = pmd_alloc(mm, pud, address);
if (!pmd)
- goto oom;
-
+ return VM_FAULT_OOM;
pte = pte_alloc_map(mm, pmd, address);
if (!pte)
- goto oom;
-
- return handle_pte_fault(mm, vma, address, write_access, pte, pmd);
+ return VM_FAULT_OOM;
- oom:
- spin_unlock(&mm->page_table_lock);
- return VM_FAULT_OOM;
+ return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
}
#ifndef __PAGETABLE_PUD_FOLDED
/*
* Allocate page upper directory.
- *
- * We've already handled the fast-path in-line, and we own the
- * page table lock.
+ * We've already handled the fast-path in-line.
*/
-pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
{
- pud_t *new;
-
- spin_unlock(&mm->page_table_lock);
- new = pud_alloc_one(mm, address);
- spin_lock(&mm->page_table_lock);
+ pud_t *new = pud_alloc_one(mm, address);
if (!new)
- return NULL;
+ return -ENOMEM;
- /*
- * Because we dropped the lock, we should re-check the
- * entry, as somebody else could have populated it..
- */
- if (pgd_present(*pgd)) {
+ spin_lock(&mm->page_table_lock);
+ if (pgd_present(*pgd)) /* Another has populated it */
pud_free(new);
- goto out;
- }
- pgd_populate(mm, pgd, new);
- out:
- return pud_offset(pgd, address);
+ else
+ pgd_populate(mm, pgd, new);
+ spin_unlock(&mm->page_table_lock);
+ return 0;
+}
+#else
+/* Workaround for gcc 2.96 */
+int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address)
+{
+ return 0;
}
#endif /* __PAGETABLE_PUD_FOLDED */
#ifndef __PAGETABLE_PMD_FOLDED
/*
* Allocate page middle directory.
- *
- * We've already handled the fast-path in-line, and we own the
- * page table lock.
+ * We've already handled the fast-path in-line.
*/
-pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
+int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
- pmd_t *new;
-
- spin_unlock(&mm->page_table_lock);
- new = pmd_alloc_one(mm, address);
- spin_lock(&mm->page_table_lock);
+ pmd_t *new = pmd_alloc_one(mm, address);
if (!new)
- return NULL;
+ return -ENOMEM;
- /*
- * Because we dropped the lock, we should re-check the
- * entry, as somebody else could have populated it..
- */
+ spin_lock(&mm->page_table_lock);
#ifndef __ARCH_HAS_4LEVEL_HACK
- if (pud_present(*pud)) {
+ if (pud_present(*pud)) /* Another has populated it */
pmd_free(new);
- goto out;
- }
- pud_populate(mm, pud, new);
+ else
+ pud_populate(mm, pud, new);
#else
- if (pgd_present(*pud)) {
+ if (pgd_present(*pud)) /* Another has populated it */
pmd_free(new);
- goto out;
- }
- pgd_populate(mm, pud, new);
+ else
+ pgd_populate(mm, pud, new);
#endif /* __ARCH_HAS_4LEVEL_HACK */
-
- out:
- return pmd_offset(pud, address);
+ spin_unlock(&mm->page_table_lock);
+ return 0;
+}
+#else
+/* Workaround for gcc 2.96 */
+int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
+{
+ return 0;
}
#endif /* __PAGETABLE_PMD_FOLDED */
EXPORT_SYMBOL(vmalloc_to_pfn);
-/*
- * update_mem_hiwater
- * - update per process rss and vm high water data
- */
-void update_mem_hiwater(struct task_struct *tsk)
-{
- if (tsk->mm) {
- unsigned long rss = get_mm_counter(tsk->mm, rss);
-
- if (tsk->mm->hiwater_rss < rss)
- tsk->mm->hiwater_rss = rss;
- if (tsk->mm->hiwater_vm < tsk->mm->total_vm)
- tsk->mm->hiwater_vm = tsk->mm->total_vm;
- }
-}
-
#if !defined(__HAVE_ARCH_GATE_AREA)
#if defined(AT_SYSINFO_EHDR)
return -ENOMEM;
}
-EXPORT_SYMBOL(sysctl_overcommit_memory);
-EXPORT_SYMBOL(sysctl_overcommit_ratio);
-EXPORT_SYMBOL(sysctl_max_map_count);
-EXPORT_SYMBOL(vm_committed_space);
EXPORT_SYMBOL(__vm_enough_memory);
/*
}
/*
- * Remove one vm structure and free it.
+ * Unlink a file-based vm structure from its prio_tree, to hide
+ * vma from rmap and vmtruncate before freeing its page tables.
*/
-static void remove_vm_struct(struct vm_area_struct *vma)
+void unlink_file_vma(struct vm_area_struct *vma)
{
struct file *file = vma->vm_file;
- might_sleep();
if (file) {
struct address_space *mapping = file->f_mapping;
spin_lock(&mapping->i_mmap_lock);
__remove_shared_vm_struct(vma, file, mapping);
spin_unlock(&mapping->i_mmap_lock);
}
+}
+
+/*
+ * Close a vm structure and free it, returning the next.
+ */
+static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
+{
+ struct vm_area_struct *next = vma->vm_next;
+
+ might_sleep();
if (vma->vm_ops && vma->vm_ops->close)
vma->vm_ops->close(vma);
- if (file)
- fput(file);
- anon_vma_unlink(vma);
+ if (vma->vm_file)
+ fput(vma->vm_file);
mpol_free(vma_policy(vma));
kmem_cache_free(vm_area_cachep, vma);
+ return next;
}
asmlinkage unsigned long sys_brk(unsigned long brk)
* If the vma has a ->close operation then the driver probably needs to release
* per-vma resources, so we don't attempt to merge those.
*/
-#define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED)
+#define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
static inline int is_mergeable_vma(struct vm_area_struct *vma,
struct file *file, unsigned long vm_flags)
}
#ifdef CONFIG_PROC_FS
-void __vm_stat_account(struct mm_struct *mm, unsigned long flags,
+void vm_stat_account(struct mm_struct *mm, unsigned long flags,
struct file *file, long pages)
{
const unsigned long stack_flags
}
out:
mm->total_vm += len >> PAGE_SHIFT;
- __vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
+ vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
if (vm_flags & VM_LOCKED) {
mm->locked_vm += len >> PAGE_SHIFT;
make_pages_present(addr, addr + len);
mm->total_vm += grow;
if (vma->vm_flags & VM_LOCKED)
mm->locked_vm += grow;
- __vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
+ vm_stat_account(mm, vma->vm_flags, vma->vm_file, grow);
return 0;
}
-#ifdef CONFIG_STACK_GROWSUP
+#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64)
/*
- * vma is the first one with address > vma->vm_end. Have to extend vma.
+ * PA-RISC uses this for its stack; IA64 for its Register Backing Store.
+ * vma is the last one with address > vma->vm_end. Have to extend vma.
*/
-int expand_stack(struct vm_area_struct * vma, unsigned long address)
+#ifndef CONFIG_IA64
+static inline
+#endif
+int expand_upwards(struct vm_area_struct *vma, unsigned long address)
{
int error;
anon_vma_unlock(vma);
return error;
}
+#endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */
+
+#ifdef CONFIG_STACK_GROWSUP
+int expand_stack(struct vm_area_struct *vma, unsigned long address)
+{
+ return expand_upwards(vma, address);
+}
struct vm_area_struct *
find_extend_vma(struct mm_struct *mm, unsigned long addr)
}
#endif
-/* Normal function to fix up a mapping
- * This function is the default for when an area has no specific
- * function. This may be used as part of a more specific routine.
- *
- * By the time this function is called, the area struct has been
- * removed from the process mapping list.
- */
-static void unmap_vma(struct mm_struct *mm, struct vm_area_struct *area)
-{
- size_t len = area->vm_end - area->vm_start;
-
- area->vm_mm->total_vm -= len >> PAGE_SHIFT;
- if (area->vm_flags & VM_LOCKED)
- area->vm_mm->locked_vm -= len >> PAGE_SHIFT;
- vm_stat_unaccount(area);
- remove_vm_struct(area);
-}
-
/*
- * Update the VMA and inode share lists.
- *
- * Ok - we have the memory areas we should free on the 'free' list,
+ * Ok - we have the memory areas we should free on the vma list,
* so release them, and do the vma updates.
+ *
+ * Called with the mm semaphore held.
*/
-static void unmap_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
+static void remove_vma_list(struct mm_struct *mm, struct vm_area_struct *vma)
{
+ /* Update high watermark before we lower total_vm */
+ update_hiwater_vm(mm);
do {
- struct vm_area_struct *next = vma->vm_next;
- unmap_vma(mm, vma);
- vma = next;
+ long nrpages = vma_pages(vma);
+
+ mm->total_vm -= nrpages;
+ if (vma->vm_flags & VM_LOCKED)
+ mm->locked_vm -= nrpages;
+ vm_stat_account(mm, vma->vm_flags, vma->vm_file, -nrpages);
+ vma = remove_vma(vma);
} while (vma);
validate_mm(mm);
}
unsigned long nr_accounted = 0;
lru_add_drain();
- spin_lock(&mm->page_table_lock);
tlb = tlb_gather_mmu(mm, 0);
- unmap_vmas(&tlb, mm, vma, start, end, &nr_accounted, NULL);
+ update_hiwater_rss(mm);
+ unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
free_pgtables(&tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
next? next->vm_start: 0);
tlb_finish_mmu(tlb, start, end);
- spin_unlock(&mm->page_table_lock);
}
/*
unmap_region(mm, vma, prev, start, end);
/* Fix up all other VM information */
- unmap_vma_list(mm, vma);
+ remove_vma_list(mm, vma);
return 0;
}
static inline void verify_mm_writelocked(struct mm_struct *mm)
{
-#ifdef CONFIG_DEBUG_KERNEL
+#ifdef CONFIG_DEBUG_VM
if (unlikely(down_read_trylock(&mm->mmap_sem))) {
WARN_ON(1);
up_read(&mm->mmap_sem);
#endif
lru_add_drain();
-
- spin_lock(&mm->page_table_lock);
-
flush_cache_mm(mm);
tlb = tlb_gather_mmu(mm, 1);
+ /* Don't update_hiwater_rss(mm) here, do_exit already did */
/* Use -1 here to ensure all VMAs in the mm are unmapped */
- end = unmap_vmas(&tlb, mm, vma, 0, -1, &nr_accounted, NULL);
+ end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
vm_unacct_memory(nr_accounted);
free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
tlb_finish_mmu(tlb, 0, end);
- mm->mmap = mm->mmap_cache = NULL;
- mm->mm_rb = RB_ROOT;
- set_mm_counter(mm, rss, 0);
- mm->total_vm = 0;
- mm->locked_vm = 0;
-
- spin_unlock(&mm->page_table_lock);
-
/*
- * Walk the list again, actually closing and freeing it
- * without holding any MM locks.
+ * Walk the list again, actually closing and freeing it,
+ * with preemption enabled, without holding any MM locks.
*/
- while (vma) {
- struct vm_area_struct *next = vma->vm_next;
- remove_vm_struct(vma);
- vma = next;
- }
+ while (vma)
+ vma = remove_vma(vma);
BUG_ON(mm->nr_ptes > (FIRST_USER_ADDRESS+PMD_SIZE-1)>>PMD_SHIFT);
}
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
+#include <linux/memory_hotplug.h>
#include <linux/nodemask.h>
#include <linux/vmalloc.h>
* NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA
* HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL
* HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA
+ *
+ * TBD: should special case ZONE_DMA32 machines here - in those we normally
+ * don't need any ZONE_NORMAL reservation
*/
-int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 };
+int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 256, 32 };
EXPORT_SYMBOL(totalram_pages);
-EXPORT_SYMBOL(nr_swap_pages);
/*
* Used by page_zone() to look up the address of the struct zone whose
struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
EXPORT_SYMBOL(zone_table);
-static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" };
+static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" };
int min_free_kbytes = 1024;
unsigned long __initdata nr_kernel_pages;
unsigned long __initdata nr_all_pages;
+static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
+{
+ int ret = 0;
+ unsigned seq;
+ unsigned long pfn = page_to_pfn(page);
+
+ do {
+ seq = zone_span_seqbegin(zone);
+ if (pfn >= zone->zone_start_pfn + zone->spanned_pages)
+ ret = 1;
+ else if (pfn < zone->zone_start_pfn)
+ ret = 1;
+ } while (zone_span_seqretry(zone, seq));
+
+ return ret;
+}
+
+static int page_is_consistent(struct zone *zone, struct page *page)
+{
+#ifdef CONFIG_HOLES_IN_ZONE
+ if (!pfn_valid(page_to_pfn(page)))
+ return 0;
+#endif
+ if (zone != page_zone(page))
+ return 0;
+
+ return 1;
+}
/*
* Temporary debugging check for pages not lying within a given zone.
*/
static int bad_range(struct zone *zone, struct page *page)
{
- if (page_to_pfn(page) >= zone->zone_start_pfn + zone->spanned_pages)
- return 1;
- if (page_to_pfn(page) < zone->zone_start_pfn)
- return 1;
-#ifdef CONFIG_HOLES_IN_ZONE
- if (!pfn_valid(page_to_pfn(page)))
+ if (page_outside_zone_boundaries(zone, page))
return 1;
-#endif
- if (zone != page_zone(page))
+ if (!page_is_consistent(zone, page))
return 1;
+
return 0;
}
printk(KERN_EMERG "Bad page state at %s (in process '%s', page %p)\n",
function, current->comm, page);
printk(KERN_EMERG "flags:0x%0*lx mapping:%p mapcount:%d count:%d\n",
- (int)(2*sizeof(page_flags_t)), (unsigned long)page->flags,
+ (int)(2*sizeof(unsigned long)), (unsigned long)page->flags,
page->mapping, page_mapcount(page), page_count(page));
printk(KERN_EMERG "Backtrace:\n");
dump_stack();
1 << PG_reclaim |
1 << PG_slab |
1 << PG_swapcache |
- 1 << PG_writeback);
+ 1 << PG_writeback );
set_page_count(page, 0);
reset_page_mapcount(page);
page->mapping = NULL;
add_taint(TAINT_BAD_PAGE);
}
-#ifndef CONFIG_HUGETLB_PAGE
-#define prep_compound_page(page, order) do { } while (0)
-#define destroy_compound_page(page, order) do { } while (0)
-#else
/*
* Higher-order pages are called "compound pages". They are structured thusly:
*
struct page *p = page + i;
SetPageCompound(p);
- p->private = (unsigned long)page;
+ set_page_private(p, (unsigned long)page);
}
}
if (!PageCompound(p))
bad_page(__FUNCTION__, page);
- if (p->private != (unsigned long)page)
+ if (page_private(p) != (unsigned long)page)
bad_page(__FUNCTION__, page);
ClearPageCompound(p);
}
}
-#endif /* CONFIG_HUGETLB_PAGE */
/*
* function for dealing with page's order in buddy system.
* So, we don't need atomic page->flags operations here.
*/
static inline unsigned long page_order(struct page *page) {
- return page->private;
+ return page_private(page);
}
static inline void set_page_order(struct page *page, int order) {
- page->private = order;
+ set_page_private(page, order);
__SetPagePrivate(page);
}
static inline void rmv_page_order(struct page *page)
{
__ClearPagePrivate(page);
- page->private = 0;
+ set_page_private(page, 0);
}
/*
* (a) the buddy is free &&
* (b) the buddy is on the buddy system &&
* (c) a page and its buddy have the same order.
- * for recording page's order, we use page->private and PG_private.
+ * for recording page's order, we use page_private(page) and PG_private.
*
*/
static inline int page_is_buddy(struct page *page, int order)
{
if (PagePrivate(page) &&
(page_order(page) == order) &&
- !PageReserved(page) &&
page_count(page) == 0)
return 1;
return 0;
* parts of the VM system.
* At each level, we keep a list of pages, which are heads of continuous
* free pages of length of (1 << order) and marked with PG_Private.Page's
- * order is recorded in page->private field.
+ * order is recorded in page_private(page) field.
* So when we are allocating or freeing one, we can derive the state of the
* other. That is, if we allocate a small block, and both were
* free, the remainder of the region must be split into blocks.
zone->free_area[order].nr_free++;
}
-static inline void free_pages_check(const char *function, struct page *page)
+static inline int free_pages_check(const char *function, struct page *page)
{
if ( page_mapcount(page) ||
page->mapping != NULL ||
1 << PG_reclaim |
1 << PG_slab |
1 << PG_swapcache |
- 1 << PG_writeback )))
+ 1 << PG_writeback |
+ 1 << PG_reserved )))
bad_page(function, page);
if (PageDirty(page))
__ClearPageDirty(page);
+ /*
+ * For now, we report if PG_reserved was found set, but do not
+ * clear it, and do not free the page. But we shall soon need
+ * to do more, for when the ZERO_PAGE count wraps negative.
+ */
+ return PageReserved(page);
}
/*
{
LIST_HEAD(list);
int i;
+ int reserved = 0;
if (arch_free_page(page, order))
return;
- mod_page_state(pgfree, 1 << order);
-
#ifndef CONFIG_MMU
if (order > 0)
for (i = 1 ; i < (1 << order) ; ++i)
#endif
for (i = 0 ; i < (1 << order) ; ++i)
- free_pages_check(__FUNCTION__, page + i);
+ reserved += free_pages_check(__FUNCTION__, page + i);
+ if (reserved)
+ return;
+
list_add(&page->lru, &list);
+ mod_page_state(pgfree, 1 << order);
kernel_map_pages(page, 1<<order, 0);
free_pages_bulk(page_zone(page), 1, &list, order);
}
/*
* This page is about to be returned from the page allocator
*/
-static void prep_new_page(struct page *page, int order)
+static int prep_new_page(struct page *page, int order)
{
if ( page_mapcount(page) ||
page->mapping != NULL ||
1 << PG_reclaim |
1 << PG_slab |
1 << PG_swapcache |
- 1 << PG_writeback )))
+ 1 << PG_writeback |
+ 1 << PG_reserved )))
bad_page(__FUNCTION__, page);
+ /*
+ * For now, we report if PG_reserved was found set, but do not
+ * clear it, and do not allocate the page: as a safety net.
+ */
+ if (PageReserved(page))
+ return 1;
+
page->flags &= ~(1 << PG_uptodate | 1 << PG_error |
1 << PG_referenced | 1 << PG_arch_1 |
1 << PG_checked | 1 << PG_mappedtodisk);
- page->private = 0;
+ set_page_private(page, 0);
set_page_refs(page, order);
kernel_map_pages(page, 1 << order, 1);
+ return 0;
}
/*
if (arch_free_page(page, 0))
return;
- kernel_map_pages(page, 1, 0);
- inc_page_state(pgfree);
if (PageAnon(page))
page->mapping = NULL;
- free_pages_check(__FUNCTION__, page);
+ if (free_pages_check(__FUNCTION__, page))
+ return;
+
+ inc_page_state(pgfree);
+ kernel_map_pages(page, 1, 0);
+
pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
local_irq_save(flags);
list_add(&page->lru, &pcp->list);
buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
{
unsigned long flags;
- struct page *page = NULL;
+ struct page *page;
int cold = !!(gfp_flags & __GFP_COLD);
+again:
if (order == 0) {
struct per_cpu_pages *pcp;
+ page = NULL;
pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
local_irq_save(flags);
if (pcp->count <= pcp->low)
}
local_irq_restore(flags);
put_cpu();
- }
-
- if (page == NULL) {
+ } else {
spin_lock_irqsave(&zone->lock, flags);
page = __rmqueue(zone, order);
spin_unlock_irqrestore(&zone->lock, flags);
if (page != NULL) {
BUG_ON(bad_range(zone, page));
mod_page_state_zone(zone, pgalloc, 1 << order);
- prep_new_page(page, order);
+ if (prep_new_page(page, order))
+ goto again;
if (gfp_flags & __GFP_ZERO)
prep_zero_page(page, order, gfp_flags);
return page;
}
+#define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */
+#define ALLOC_WMARK_MIN 0x02 /* use pages_min watermark */
+#define ALLOC_WMARK_LOW 0x04 /* use pages_low watermark */
+#define ALLOC_WMARK_HIGH 0x08 /* use pages_high watermark */
+#define ALLOC_HARDER 0x10 /* try to alloc harder */
+#define ALLOC_HIGH 0x20 /* __GFP_HIGH set */
+#define ALLOC_CPUSET 0x40 /* check for correct cpuset */
+
/*
* Return 1 if free pages are above 'mark'. This takes into account the order
* of the allocation.
*/
int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
- int classzone_idx, int can_try_harder, int gfp_high)
+ int classzone_idx, int alloc_flags)
{
/* free_pages my go negative - that's OK */
long min = mark, free_pages = z->free_pages - (1 << order) + 1;
int o;
- if (gfp_high)
+ if (alloc_flags & ALLOC_HIGH)
min -= min / 2;
- if (can_try_harder)
+ if (alloc_flags & ALLOC_HARDER)
min -= min / 4;
if (free_pages <= min + z->lowmem_reserve[classzone_idx])
return 1;
}
-static inline int
-should_reclaim_zone(struct zone *z, gfp_t gfp_mask)
+/*
+ * get_page_from_freeliest goes through the zonelist trying to allocate
+ * a page.
+ */
+static struct page *
+get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
+ struct zonelist *zonelist, int alloc_flags)
{
- if (!z->reclaim_pages)
- return 0;
- if (gfp_mask & __GFP_NORECLAIM)
- return 0;
- return 1;
+ struct zone **z = zonelist->zones;
+ struct page *page = NULL;
+ int classzone_idx = zone_idx(*z);
+
+ /*
+ * Go through the zonelist once, looking for a zone with enough free.
+ * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+ */
+ do {
+ if ((alloc_flags & ALLOC_CPUSET) &&
+ !cpuset_zone_allowed(*z, gfp_mask))
+ continue;
+
+ if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
+ unsigned long mark;
+ if (alloc_flags & ALLOC_WMARK_MIN)
+ mark = (*z)->pages_min;
+ else if (alloc_flags & ALLOC_WMARK_LOW)
+ mark = (*z)->pages_low;
+ else
+ mark = (*z)->pages_high;
+ if (!zone_watermark_ok(*z, order, mark,
+ classzone_idx, alloc_flags))
+ continue;
+ }
+
+ page = buffered_rmqueue(*z, order, gfp_mask);
+ if (page) {
+ zone_statistics(zonelist, *z);
+ break;
+ }
+ } while (*(++z) != NULL);
+ return page;
}
/*
__alloc_pages(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist)
{
- const int wait = gfp_mask & __GFP_WAIT;
- struct zone **zones, *z;
+ const gfp_t wait = gfp_mask & __GFP_WAIT;
+ struct zone **z;
struct page *page;
struct reclaim_state reclaim_state;
struct task_struct *p = current;
- int i;
- int classzone_idx;
int do_retry;
- int can_try_harder;
+ int alloc_flags;
int did_some_progress;
might_sleep_if(wait);
- /*
- * The caller may dip into page reserves a bit more if the caller
- * cannot run direct reclaim, or is the caller has realtime scheduling
- * policy
- */
- can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;
-
- zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
+restart:
+ z = zonelist->zones; /* the list of zones suitable for gfp_mask */
- if (unlikely(zones[0] == NULL)) {
+ if (unlikely(*z == NULL)) {
/* Should this ever happen?? */
return NULL;
}
- classzone_idx = zone_idx(zones[0]);
+ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+ zonelist, ALLOC_WMARK_LOW|ALLOC_CPUSET);
+ if (page)
+ goto got_pg;
+
+ do {
+ wakeup_kswapd(*z, order);
+ } while (*(++z));
-restart:
/*
- * Go through the zonelist once, looking for a zone with enough free.
- * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+ * OK, we're below the kswapd watermark and have kicked background
+ * reclaim. Now things get more complex, so set up alloc_flags according
+ * to how we want to proceed.
+ *
+ * The caller may dip into page reserves a bit more if the caller
+ * cannot run direct reclaim, or if the caller has realtime scheduling
+ * policy.
*/
- for (i = 0; (z = zones[i]) != NULL; i++) {
- int do_reclaim = should_reclaim_zone(z, gfp_mask);
-
- if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
- continue;
-
- /*
- * If the zone is to attempt early page reclaim then this loop
- * will try to reclaim pages and check the watermark a second
- * time before giving up and falling back to the next zone.
- */
-zone_reclaim_retry:
- if (!zone_watermark_ok(z, order, z->pages_low,
- classzone_idx, 0, 0)) {
- if (!do_reclaim)
- continue;
- else {
- zone_reclaim(z, gfp_mask, order);
- /* Only try reclaim once */
- do_reclaim = 0;
- goto zone_reclaim_retry;
- }
- }
-
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
- }
-
- for (i = 0; (z = zones[i]) != NULL; i++)
- wakeup_kswapd(z, order);
+ alloc_flags = ALLOC_WMARK_MIN;
+ if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait)
+ alloc_flags |= ALLOC_HARDER;
+ if (gfp_mask & __GFP_HIGH)
+ alloc_flags |= ALLOC_HIGH;
+ if (wait)
+ alloc_flags |= ALLOC_CPUSET;
/*
* Go through the zonelist again. Let __GFP_HIGH and allocations
- * coming from realtime tasks to go deeper into reserves
+ * coming from realtime tasks go deeper into reserves.
*
* This is the last chance, in general, before the goto nopage.
* Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
*/
- for (i = 0; (z = zones[i]) != NULL; i++) {
- if (!zone_watermark_ok(z, order, z->pages_min,
- classzone_idx, can_try_harder,
- gfp_mask & __GFP_HIGH))
- continue;
-
- if (wait && !cpuset_zone_allowed(z, gfp_mask))
- continue;
-
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
- }
+ page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags);
+ if (page)
+ goto got_pg;
/* This allocation should allow future memory freeing. */
if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE)))
&& !in_interrupt()) {
if (!(gfp_mask & __GFP_NOMEMALLOC)) {
+nofail_alloc:
/* go through the zonelist yet again, ignoring mins */
- for (i = 0; (z = zones[i]) != NULL; i++) {
- if (!cpuset_zone_allowed(z, gfp_mask))
- continue;
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
+ page = get_page_from_freelist(gfp_mask, order,
+ zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET);
+ if (page)
+ goto got_pg;
+ if (gfp_mask & __GFP_NOFAIL) {
+ blk_congestion_wait(WRITE, HZ/50);
+ goto nofail_alloc;
}
}
goto nopage;
reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state;
- did_some_progress = try_to_free_pages(zones, gfp_mask);
+ did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
p->reclaim_state = NULL;
p->flags &= ~PF_MEMALLOC;
cond_resched();
if (likely(did_some_progress)) {
- for (i = 0; (z = zones[i]) != NULL; i++) {
- if (!zone_watermark_ok(z, order, z->pages_min,
- classzone_idx, can_try_harder,
- gfp_mask & __GFP_HIGH))
- continue;
-
- if (!cpuset_zone_allowed(z, gfp_mask))
- continue;
-
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
- }
+ page = get_page_from_freelist(gfp_mask, order,
+ zonelist, alloc_flags);
+ if (page)
+ goto got_pg;
} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
/*
* Go through the zonelist yet one more time, keep
* a parallel oom killing, we must fail if we're still
* under heavy pressure.
*/
- for (i = 0; (z = zones[i]) != NULL; i++) {
- if (!zone_watermark_ok(z, order, z->pages_high,
- classzone_idx, 0, 0))
- continue;
-
- if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
- continue;
-
- page = buffered_rmqueue(z, order, gfp_mask);
- if (page)
- goto got_pg;
- }
+ page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
+ zonelist, ALLOC_WMARK_HIGH|ALLOC_CPUSET);
+ if (page)
+ goto got_pg;
out_of_memory(gfp_mask, order);
goto restart;
dump_stack();
show_mem();
}
- return NULL;
got_pg:
- zone_statistics(zonelist, z);
return page;
}
* get_zeroed_page() returns a 32-bit address, which cannot represent
* a highmem page
*/
- BUG_ON(gfp_mask & __GFP_HIGHMEM);
+ BUG_ON((gfp_mask & __GFP_HIGHMEM) != 0);
page = alloc_pages(gfp_mask | __GFP_ZERO, 0);
if (page)
fastcall void __free_pages(struct page *page, unsigned int order)
{
- if (!PageReserved(page) && put_page_testzero(page)) {
+ if (put_page_testzero(page)) {
if (order == 0)
free_hot_page(page);
else
*/
unsigned int nr_free_buffer_pages(void)
{
- return nr_free_zone_pages(GFP_USER & GFP_ZONEMASK);
+ return nr_free_zone_pages(gfp_zone(GFP_USER));
}
/*
*/
unsigned int nr_free_pagecache_pages(void)
{
- return nr_free_zone_pages(GFP_HIGHUSER & GFP_ZONEMASK);
+ return nr_free_zone_pages(gfp_zone(GFP_HIGHUSER));
}
#ifdef CONFIG_HIGHMEM
} else
printk("\n");
- for (cpu = 0; cpu < NR_CPUS; ++cpu) {
+ for_each_online_cpu(cpu) {
struct per_cpu_pageset *pageset;
- if (!cpu_possible(cpu))
- continue;
-
pageset = zone_pcp(zone, cpu);
for (temperature = 0; temperature < 2; temperature++)
zone = pgdat->node_zones + ZONE_NORMAL;
if (zone->present_pages)
zonelist->zones[j++] = zone;
+ case ZONE_DMA32:
+ zone = pgdat->node_zones + ZONE_DMA32;
+ if (zone->present_pages)
+ zonelist->zones[j++] = zone;
case ZONE_DMA:
zone = pgdat->node_zones + ZONE_DMA;
if (zone->present_pages)
return j;
}
+static inline int highest_zone(int zone_bits)
+{
+ int res = ZONE_NORMAL;
+ if (zone_bits & (__force int)__GFP_HIGHMEM)
+ res = ZONE_HIGHMEM;
+ if (zone_bits & (__force int)__GFP_DMA32)
+ res = ZONE_DMA32;
+ if (zone_bits & (__force int)__GFP_DMA)
+ res = ZONE_DMA;
+ return res;
+}
+
#ifdef CONFIG_NUMA
#define MAX_NODE_LOAD (num_online_nodes())
static int __initdata node_load[MAX_NUMNODES];
zonelist = pgdat->node_zonelists + i;
for (j = 0; zonelist->zones[j] != NULL; j++);
- k = ZONE_NORMAL;
- if (i & __GFP_HIGHMEM)
- k = ZONE_HIGHMEM;
- if (i & __GFP_DMA)
- k = ZONE_DMA;
+ k = highest_zone(i);
j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
zonelist->zones[j] = NULL;
zonelist = pgdat->node_zonelists + i;
j = 0;
- k = ZONE_NORMAL;
- if (i & __GFP_HIGHMEM)
- k = ZONE_HIGHMEM;
- if (i & __GFP_DMA)
- k = ZONE_DMA;
-
+ k = highest_zone(i);
j = build_zonelists_node(pgdat, zonelist, j, k);
/*
* Now we build the zonelist so that it contains the zones
* up by free_all_bootmem() once the early boot process is
* done. Non-atomic initialization, single-pass.
*/
-void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone,
+void __devinit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn)
{
struct page *page;
continue;
page = pfn_to_page(pfn);
set_page_links(page, zone, nid, pfn);
- set_page_count(page, 0);
+ set_page_count(page, 1);
reset_page_mapcount(page);
SetPageReserved(page);
INIT_LIST_HEAD(&page->lru);
/*
* The per-cpu-pages pools are set to around 1000th of the
- * size of the zone. But no more than 1/4 of a meg - there's
- * no point in going beyond the size of L2 cache.
+ * size of the zone. But no more than 1/2 of a meg.
*
* OK, so we don't know how big the cache is. So guess.
*/
batch = zone->present_pages / 1024;
- if (batch * PAGE_SIZE > 256 * 1024)
- batch = (256 * 1024) / PAGE_SIZE;
+ if (batch * PAGE_SIZE > 512 * 1024)
+ batch = (512 * 1024) / PAGE_SIZE;
batch /= 4; /* We effectively *= 4 below */
if (batch < 1)
batch = 1;
* of pages of one half of the possible page colors
* and the other with pages of the other colors.
*/
- batch = (1 << fls(batch + batch/2)) - 1;
+ batch = (1 << (fls(batch + batch/2)-1)) - 1;
+
return batch;
}
pcp = &p->pcp[0]; /* hot */
pcp->count = 0;
- pcp->low = 2 * batch;
+ pcp->low = 0;
pcp->high = 6 * batch;
pcp->batch = max(1UL, 1 * batch);
INIT_LIST_HEAD(&pcp->list);
pcp->count = 0;
pcp->low = 0;
pcp->high = 2 * batch;
- pcp->batch = max(1UL, 1 * batch);
+ pcp->batch = max(1UL, batch/2);
INIT_LIST_HEAD(&pcp->list);
}
if (process_zones(cpu))
ret = NOTIFY_BAD;
break;
-#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_UP_CANCELED:
case CPU_DEAD:
free_zone_pagesets(cpu);
break;
-#endif
default:
break;
}
static struct notifier_block pageset_notifier =
{ &pageset_cpuup_callback, NULL, 0 };
-void __init setup_per_cpu_pageset()
+void __init setup_per_cpu_pageset(void)
{
int err;
#endif
+static __devinit
+void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
+{
+ int i;
+ struct pglist_data *pgdat = zone->zone_pgdat;
+
+ /*
+ * The per-page waitqueue mechanism uses hashed waitqueues
+ * per zone.
+ */
+ zone->wait_table_size = wait_table_size(zone_size_pages);
+ zone->wait_table_bits = wait_table_bits(zone->wait_table_size);
+ zone->wait_table = (wait_queue_head_t *)
+ alloc_bootmem_node(pgdat, zone->wait_table_size
+ * sizeof(wait_queue_head_t));
+
+ for(i = 0; i < zone->wait_table_size; ++i)
+ init_waitqueue_head(zone->wait_table + i);
+}
+
+static __devinit void zone_pcp_init(struct zone *zone)
+{
+ int cpu;
+ unsigned long batch = zone_batchsize(zone);
+
+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
+#ifdef CONFIG_NUMA
+ /* Early boot. Slab allocator not functional yet */
+ zone->pageset[cpu] = &boot_pageset[cpu];
+ setup_pageset(&boot_pageset[cpu],0);
+#else
+ setup_pageset(zone_pcp(zone,cpu), batch);
+#endif
+ }
+ printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n",
+ zone->name, zone->present_pages, batch);
+}
+
+static __devinit void init_currently_empty_zone(struct zone *zone,
+ unsigned long zone_start_pfn, unsigned long size)
+{
+ struct pglist_data *pgdat = zone->zone_pgdat;
+
+ zone_wait_table_init(zone, size);
+ pgdat->nr_zones = zone_idx(zone) + 1;
+
+ zone->zone_mem_map = pfn_to_page(zone_start_pfn);
+ zone->zone_start_pfn = zone_start_pfn;
+
+ memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);
+
+ zone_init_free_lists(pgdat, zone, zone->spanned_pages);
+}
+
/*
* Set up the zone data structures:
* - mark all pages reserved
static void __init free_area_init_core(struct pglist_data *pgdat,
unsigned long *zones_size, unsigned long *zholes_size)
{
- unsigned long i, j;
- int cpu, nid = pgdat->node_id;
+ unsigned long j;
+ int nid = pgdat->node_id;
unsigned long zone_start_pfn = pgdat->node_start_pfn;
+ pgdat_resize_init(pgdat);
pgdat->nr_zones = 0;
init_waitqueue_head(&pgdat->kswapd_wait);
pgdat->kswapd_max_order = 0;
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
unsigned long size, realsize;
- unsigned long batch;
realsize = size = zones_size[j];
if (zholes_size)
realsize -= zholes_size[j];
- if (j == ZONE_DMA || j == ZONE_NORMAL)
+ if (j < ZONE_HIGHMEM)
nr_kernel_pages += realsize;
nr_all_pages += realsize;
zone->name = zone_names[j];
spin_lock_init(&zone->lock);
spin_lock_init(&zone->lru_lock);
+ zone_seqlock_init(zone);
zone->zone_pgdat = pgdat;
zone->free_pages = 0;
zone->temp_priority = zone->prev_priority = DEF_PRIORITY;
- batch = zone_batchsize(zone);
-
- for (cpu = 0; cpu < NR_CPUS; cpu++) {
-#ifdef CONFIG_NUMA
- /* Early boot. Slab allocator not functional yet */
- zone->pageset[cpu] = &boot_pageset[cpu];
- setup_pageset(&boot_pageset[cpu],0);
-#else
- setup_pageset(zone_pcp(zone,cpu), batch);
-#endif
- }
- printk(KERN_DEBUG " %s zone: %lu pages, LIFO batch:%lu\n",
- zone_names[j], realsize, batch);
+ zone_pcp_init(zone);
INIT_LIST_HEAD(&zone->active_list);
INIT_LIST_HEAD(&zone->inactive_list);
zone->nr_scan_active = 0;
if (!size)
continue;
- /*
- * The per-page waitqueue mechanism uses hashed waitqueues
- * per zone.
- */
- zone->wait_table_size = wait_table_size(size);
- zone->wait_table_bits =
- wait_table_bits(zone->wait_table_size);
- zone->wait_table = (wait_queue_head_t *)
- alloc_bootmem_node(pgdat, zone->wait_table_size
- * sizeof(wait_queue_head_t));
-
- for(i = 0; i < zone->wait_table_size; ++i)
- init_waitqueue_head(zone->wait_table + i);
-
- pgdat->nr_zones = j+1;
-
- zone->zone_mem_map = pfn_to_page(zone_start_pfn);
- zone->zone_start_pfn = zone_start_pfn;
-
- memmap_init(size, nid, j, zone_start_pfn);
-
zonetable_add(zone, nid, j, zone_start_pfn, size);
-
+ init_currently_empty_zone(zone, zone_start_pfn, size);
zone_start_pfn += size;
-
- zone_init_free_lists(pgdat, zone, zone->spanned_pages);
}
}
* that the pages_{min,low,high} values for each zone are set correctly
* with respect to min_free_kbytes.
*/
-static void setup_per_zone_pages_min(void)
+void setup_per_zone_pages_min(void)
{
unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10);
unsigned long lowmem_pages = 0;
}
for_each_zone(zone) {
+ unsigned long tmp;
spin_lock_irqsave(&zone->lru_lock, flags);
+ tmp = (pages_min * zone->present_pages) / lowmem_pages;
if (is_highmem(zone)) {
/*
- * Often, highmem doesn't need to reserve any pages.
- * But the pages_min/low/high values are also used for
- * batching up page reclaim activity so we need a
- * decent value here.
+ * __GFP_HIGH and PF_MEMALLOC allocations usually don't
+ * need highmem pages, so cap pages_min to a small
+ * value here.
+ *
+ * The (pages_high-pages_low) and (pages_low-pages_min)
+ * deltas controls asynch page reclaim, and so should
+ * not be capped for highmem.
*/
int min_pages;
min_pages = 128;
zone->pages_min = min_pages;
} else {
- /* if it's a lowmem zone, reserve a number of pages
+ /*
+ * If it's a lowmem zone, reserve a number of pages
* proportionate to the zone's size.
*/
- zone->pages_min = (pages_min * zone->present_pages) /
- lowmem_pages;
+ zone->pages_min = tmp;
}
- /*
- * When interpreting these watermarks, just keep in mind that:
- * zone->pages_min == (zone->pages_min * 4) / 4;
- */
- zone->pages_low = (zone->pages_min * 5) / 4;
- zone->pages_high = (zone->pages_min * 6) / 4;
+ zone->pages_low = zone->pages_min + tmp / 4;
+ zone->pages_high = zone->pages_min + tmp / 2;
spin_unlock_irqrestore(&zone->lru_lock, flags);
}
}
return ret;
}
+/* Take action when hardware reception checksum errors are detected. */
+#ifdef CONFIG_BUG
+void netdev_rx_csum_fault(struct net_device *dev)
+{
+ if (net_ratelimit()) {
+ printk(KERN_ERR "%s: hw csum failure.\n",
+ dev ? dev->name : "<unknown>");
+ dump_stack();
+ }
+}
+EXPORT_SYMBOL(netdev_rx_csum_fault);
+#endif
+
#ifdef CONFIG_HIGHMEM
/* Actually, we should eliminate this check as soon as we know, that:
* 1. IOMMU is present and allows to map all the memory.
dev->name);
dev->features &= ~NETIF_F_TSO;
}
+ if (dev->features & NETIF_F_UFO) {
+ if (!(dev->features & NETIF_F_HW_CSUM)) {
+ printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
+ "NETIF_F_HW_CSUM feature.\n",
+ dev->name);
+ dev->features &= ~NETIF_F_UFO;
+ }
+ if (!(dev->features & NETIF_F_SG)) {
+ printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
+ "NETIF_F_SG feature.\n",
+ dev->name);
+ dev->features &= ~NETIF_F_UFO;
+ }
+ }
/*
* nil rebuild_header routine,
* __alloc_skb - allocate a network buffer
* @size: size to allocate
* @gfp_mask: allocation mask
+ * @fclone: allocate from fclone cache instead of head cache
+ * and allocate a cloned (child) skb
*
* Allocate a new &sk_buff. The returned buffer has no headroom and a
* tail room of size bytes. The object has a reference count of one.
skb_shinfo(skb)->tso_size = 0;
skb_shinfo(skb)->tso_segs = 0;
skb_shinfo(skb)->frag_list = NULL;
+ skb_shinfo(skb)->ufo_size = 0;
+ skb_shinfo(skb)->ip6_frag_id = 0;
out:
return skb;
nodata:
skb_shinfo(skb)->tso_size = 0;
skb_shinfo(skb)->tso_segs = 0;
skb_shinfo(skb)->frag_list = NULL;
+ skb_shinfo(skb)->ufo_size = 0;
+ skb_shinfo(skb)->ip6_frag_id = 0;
out:
return skb;
nodata:
}
#ifdef CONFIG_NETFILTER
nf_conntrack_put(skb->nfct);
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ nf_conntrack_put_reasm(skb->nfct_reasm);
+#endif
#ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_put(skb->nf_bridge);
#endif
C(nfct);
nf_conntrack_get(skb->nfct);
C(nfctinfo);
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ C(nfct_reasm);
+ nf_conntrack_get_reasm(skb->nfct_reasm);
+#endif
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
C(ipvs_property);
#endif
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ C(nfct_reasm);
+ nf_conntrack_get_reasm(skb->nfct_reasm);
+#endif
#ifdef CONFIG_BRIDGE_NETFILTER
C(nf_bridge);
nf_bridge_get(skb->nf_bridge);
new->nfct = old->nfct;
nf_conntrack_get(old->nfct);
new->nfctinfo = old->nfctinfo;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ new->nfct_reasm = old->nfct_reasm;
+ nf_conntrack_get_reasm(old->nfct_reasm);
+#endif
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
new->ipvs_property = old->ipvs_property;
#endif
return textsearch_find(config, state);
}
+/**
+ * skb_append_datato_frags: - append the user data to a skb
+ * @sk: sock structure
+ * @skb: skb structure to be appened with user data.
+ * @getfrag: call back function to be used for getting the user data
+ * @from: pointer to user message iov
+ * @length: length of the iov message
+ *
+ * Description: This procedure append the user data in the fragment part
+ * of the skb if any page alloc fails user this procedure returns -ENOMEM
+ */
+int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
+ int (*getfrag)(void *from, char *to, int offset,
+ int len, int odd, struct sk_buff *skb),
+ void *from, int length)
+{
+ int frg_cnt = 0;
+ skb_frag_t *frag = NULL;
+ struct page *page = NULL;
+ int copy, left;
+ int offset = 0;
+ int ret;
+
+ do {
+ /* Return error if we don't have space for new frag */
+ frg_cnt = skb_shinfo(skb)->nr_frags;
+ if (frg_cnt >= MAX_SKB_FRAGS)
+ return -EFAULT;
+
+ /* allocate a new page for next frag */
+ page = alloc_pages(sk->sk_allocation, 0);
+
+ /* If alloc_page fails just return failure and caller will
+ * free previous allocated pages by doing kfree_skb()
+ */
+ if (page == NULL)
+ return -ENOMEM;
+
+ /* initialize the next frag */
+ sk->sk_sndmsg_page = page;
+ sk->sk_sndmsg_off = 0;
+ skb_fill_page_desc(skb, frg_cnt, page, 0, 0);
+ skb->truesize += PAGE_SIZE;
+ atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
+
+ /* get the new initialized frag */
+ frg_cnt = skb_shinfo(skb)->nr_frags;
+ frag = &skb_shinfo(skb)->frags[frg_cnt - 1];
+
+ /* copy the user data to page */
+ left = PAGE_SIZE - frag->page_offset;
+ copy = (length > left)? left : length;
+
+ ret = getfrag(from, (page_address(frag->page) +
+ frag->page_offset + frag->size),
+ offset, copy, 0, skb);
+ if (ret < 0)
+ return -EFAULT;
+
+ /* copy was successful so update the size parameters */
+ sk->sk_sndmsg_off += copy;
+ frag->size += copy;
+ skb->len += copy;
+ skb->data_len += copy;
+ offset += copy;
+ length -= copy;
+
+ } while (length > 0);
+
+ return 0;
+}
+
void __init skb_init(void)
{
skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
EXPORT_SYMBOL(skb_seq_read);
EXPORT_SYMBOL(skb_abort_seq_read);
EXPORT_SYMBOL(skb_find_text);
+EXPORT_SYMBOL(skb_append_datato_frags);